From f9b595adbdc3f6f51ffa2c1f2aa70d0262d0db2d Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Fri, 3 Jul 2020 13:34:52 +0100 Subject: COMPMID-3532: Align data type support between doxygen and implementation - NEON Change-Id: I70662cfb43890873b706b3f22b348f5d8cdd63ca Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3506 Tested-by: Arm Jenkins Reviewed-by: Manuel Bottini Reviewed-by: Sheri Zhang Comments-Addressed: Arm Jenkins --- .../NEON/kernels/NEArithmeticSubtractionKernel.h | 12 +++++ .../core/NEON/kernels/NEChannelExtractKernel.h | 6 +-- .../NEON/kernels/NEDequantizationLayerKernel.h | 6 +-- .../NEON/kernels/NEElementwiseOperationKernel.h | 38 +++++++-------- .../core/NEON/kernels/NEElementwiseUnaryKernel.h | 4 +- .../core/NEON/kernels/NEGEMMLowpReductionKernel.h | 10 ++-- .../core/NEON/kernels/NEGaussian3x3Kernel.h | 4 +- arm_compute/core/NEON/kernels/NEIm2ColKernel.h | 4 +- .../core/NEON/kernels/NEMaxUnpoolingLayerKernel.h | 11 ++--- .../NEON/kernels/NEPixelWiseMultiplicationKernel.h | 56 +++++++++++++--------- .../core/NEON/kernels/NEReductionOperationKernel.h | 4 +- .../core/NEON/kernels/NEUpsampleLayerKernel.h | 4 +- .../core/NEON/kernels/NEWeightsReshapeKernel.h | 4 +- .../NEON/functions/NEArithmeticSubtraction.h | 24 ++++++++++ arm_compute/runtime/NEON/functions/NECast.h | 18 +++---- .../runtime/NEON/functions/NEDequantizationLayer.h | 6 +-- .../NEON/functions/NEDetectionPostProcessLayer.h | 24 +++++----- .../NEON/functions/NEElementwiseOperations.h | 4 +- .../NEON/functions/NEElementwiseUnaryLayer.h | 4 +- .../NEON/functions/NEGEMMConvolutionLayer.h | 16 +++---- arm_compute/runtime/NEON/functions/NEIm2Col.h | 4 +- .../NEON/functions/NEPixelWiseMultiplication.h | 56 +++++++++++++--------- .../runtime/NEON/functions/NEPoolingLayer.h | 6 +-- .../runtime/NEON/functions/NEReductionOperation.h | 13 +++-- arm_compute/runtime/NEON/functions/NEScale.h | 8 ++-- arm_compute/runtime/NEON/functions/NEUnstack.h | 10 ++-- .../NEON/kernels/NEAbsoluteDifferenceKernel.cpp | 11 ++--- src/core/NEON/kernels/NEActivationLayerKernel.cpp | 12 ++--- .../NEON/kernels/NEBatchConcatenateLayerKernel.cpp | 4 -- .../NEON/kernels/NEBatchToSpaceLayerKernel.cpp | 3 -- .../NEDepthwiseConvolutionLayerNativeKernel.cpp | 4 +- src/core/NEON/kernels/NEDilateKernel.cpp | 14 +++--- src/core/NEON/kernels/NEErodeKernel.cpp | 14 +++--- src/core/NEON/kernels/NEFlattenLayerKernel.cpp | 11 ++--- .../kernels/NEFuseBatchNormalizationKernel.cpp | 1 - .../NEON/kernels/NEGEMMLowpReductionKernel.cpp | 11 +---- src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp | 15 ++---- .../NEON/kernels/NEMaxUnpoolingLayerKernel.cpp | 33 +++++-------- .../kernels/NEPixelWiseMultiplicationKernel.cpp | 11 +++-- src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp | 7 +-- src/core/NEON/kernels/NEReverseKernel.cpp | 34 +++---------- src/core/NEON/kernels/NEWeightsReshapeKernel.cpp | 15 ++---- src/runtime/NEON/functions/NERNNLayer.cpp | 1 + 44 files changed, 269 insertions(+), 290 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h index dfd08d9b06..4872edd90f 100644 --- a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h +++ b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h @@ -73,6 +73,18 @@ public: */ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy); /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtractionKernel + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (QASYMM8, QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (S16,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 * * @note Convert policy cannot be WRAP if datatype is QASYMM8 * diff --git a/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h b/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h index 8d62016fe5..d953ff33ed 100644 --- a/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h +++ b/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,8 +27,6 @@ #include "arm_compute/core/NEON/INESimpleKernel.h" #include "arm_compute/core/Types.h" -#include - namespace arm_compute { class IMultiImage; @@ -60,7 +58,7 @@ public: * * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 * @param[in] channel Channel to extract. - * @param[out] output Destination tensor. Format supported: u8 + * @param[out] output Destination tensor. Format supported: U8 */ void configure(const ITensor *input, Channel channel, ITensor *output); /** Set the input and output of the kernel diff --git a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h index 3792fb3bd7..2a85da28de 100644 --- a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -52,13 +52,13 @@ public: ~NEDequantizationLayerKernel() = default; /** Set input, output tensors. * - * @param[in] input Source tensor. Data type supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayerKernel * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[in] output Output tensor info. Data types supported: F16/F32. * * @return a status diff --git a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h index b109ddd0f8..12d7fb50a9 100644 --- a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h +++ b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h @@ -59,10 +59,10 @@ public: /** Common signature for all the specialised arithmetic functions * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Dependent on subclass. - * @param[in] window Region on which to execute the kernel. + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[out] output Output tensor info. Data types supported: Dependent on subclass. + * @param[in] window Region on which to execute the kernel. */ using ElementwiseFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); @@ -100,10 +100,10 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[out] output Output tensor info. Data types supported: Same as @p input1. */ void configure(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); @@ -131,9 +131,9 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel * - * @param[in] input1 First tensor input info. Data types supported: F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] input1 First tensor input info. Data types supported: F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[out] output Output tensor info. Data types supported: Same as @p input1. */ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); @@ -168,9 +168,9 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel * - * @param[in] input1 First tensor input info. Data types supported: F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] input1 First tensor input info. Data types supported: F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. * * @return a Status */ @@ -189,10 +189,10 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel * - * @param[in] op Comparison operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: U16/U32. + * @param[in] op Comparison operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[out] output Output tensor info. Data types supported: U8. */ void configure(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); @@ -201,7 +201,7 @@ public: * @param[in] op Comparison operation to be executed. * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: U16/U32. + * @param[in] output Output tensor info. Data types supported: U8. * * @return a Status */ diff --git a/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h index 02c390b6ba..c63618c256 100644 --- a/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h +++ b/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h @@ -60,7 +60,7 @@ public: /** Function to configure the @ref NEElementwiseUnaryKernel * * @param[in] op Arithmetic operation to be executed. - * @param[in] input First tensor input. Data types supported: F16/F32. + * @param[in] input First tensor input. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. * @param[out] output Output tensor. Data types supported: Same as @p input. */ void configure(ElementWiseUnary op, const ITensor *input, ITensor *output); @@ -68,7 +68,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEElementwiseUnaryKernel * * @param[in] op Arithmetic operation to be executed. - * @param[in] input First tensor input info. Data types supported: F16/F32. + * @param[in] input First tensor input info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. * @param[in] output Output tensor info. Data types supported: Same as @p input. * * @return a Status diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h index 1e472f5252..dcee3da2d5 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h @@ -49,7 +49,7 @@ public: /** Initialise the kernel's input and output. * - * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32 * @param[in] info Kernel metadata: * - k Number of matrix columns/rows depending on the type of reduction. @@ -82,7 +82,7 @@ public: } /** Initialise the kernel's input and output. * - * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 * @param[in] info Kernel metadata: * - k (num_mtx_a_cols) Number of matrix A columns @@ -93,7 +93,7 @@ public: void configure(const ITensor *mtx_a, ITensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override; /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixAReductionKernel * - * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 * @param[in] info Kernel metadata: * - k (num_mtx_a_cols) Number of matrix A columns @@ -131,7 +131,7 @@ public: } /** Initialise the kernel's input and output. * - * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 * @param[in] info Kernel metadata: * - k (num_mtx_b_rows) Number of matrix B rows. @@ -142,7 +142,7 @@ public: void configure(const ITensor *mtx_b, ITensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override; /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixBReductionKernel * - * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 * @param[in] info Kernel metadata: * - k (num_mtx_b_rows) Number of matrix B rows. diff --git a/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h b/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h index fa92eef1b7..099b226d2f 100644 --- a/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -41,7 +41,7 @@ public: /** Set the source, destination and border mode of the kernel * * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: S16 + * @param[out] output Destination tensor. Data type supported: same as @p input * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ITensor *input, ITensor *output, bool border_undefined); diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h index 1c358b379d..97cdfb4958 100644 --- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h +++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h @@ -79,7 +79,7 @@ public: * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 - * Note: QASYMM8 works only for has_bias = false + * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false * @param[out] output The output tensor. Data types supported: Same as @p input * @param[in] kernel_dims The kernel dimensions (width and height). * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. @@ -94,7 +94,7 @@ public: * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 - * Note: QASYMM8 works only for has_bias = false + * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false * @param[in] output The output tensor. Data types supported: Same as @p input * @param[in] kernel_dims The kernel dimensions (width and height). * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. diff --git a/arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h index 269317b6c1..7160d5d328 100644 --- a/arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h @@ -54,7 +54,7 @@ public: * * @note Output shape must be equal to the shape of the original input to pool. * - * @param[in] input Source tensor. Data types supported: F16/F32. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[out] indices The indices of the maximal values. Data type supported: U32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. @@ -62,9 +62,9 @@ public: void configure(const ITensor *input, const ITensor *indices, ITensor *output, const PoolingLayerInfo &pool_info); /** Static function to check if given info will lead to a valid configuration of @ref NEMaxUnpoolingLayerKernel * - * @param[in] input Source tensor. Data types supported: F16/F32. - * @param[in] output Destination tensor. Data types supported: Same as @p input. - * @param[in] indices The indices of the maximal values. Data type supported: U32. + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Destination tensor info. Data types supported: Same as @p input. + * @param[in] indices Tensor info of the indices of the maximal values. Data type supported: U32. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. * * @return a status @@ -89,9 +89,6 @@ private: const ITensor *_input; ITensor *_output; const ITensor *_indices; - PoolingLayerInfo _pool_info; - DataLayout _data_layout; - unsigned int _num_elems_processed_per_iteration; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h index 5483fae565..2263e480a9 100644 --- a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h +++ b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h @@ -52,21 +52,27 @@ public: /** Default destructor */ ~NEPixelWiseMultiplicationKernel() = default; /** Initialise the kernel's input, output and border mode. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,U8) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * - (QSYMM16,QSYMM16) -> S32 * * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. * - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[in] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). - * @param[out] output Output tensor. Data types supported: - * - U8, only if both inputs are U8. - * - QASYMM8, only if both inputs are QASYMM8. - * - QASYMM8_SIGNED, only if @p input1 is QASYMM8_SIGNED. - * - S16. - * - QSYMM16, only if both inputs are QSYMM16. - * - S32, only if both inputs are QSYMM16. - * - F16, only if @p input1 is F16. - * - F32, only if both inputs are F32. + * @param[in] input1 First input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 + * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 + * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32/S32 * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8, QASYMM8_SIGNED or QSYMM16. @@ -74,21 +80,27 @@ public: */ void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); /** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplicationKernel + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,U8) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * - (QSYMM16,QSYMM16) -> S32 * * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. * - * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 - * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). - * @param[in] output Output tensor info. Data types supported: - * - U8, only if both inputs are U8. - * - QASYMM8, only if both inputs are QASYMM8. - * - QASYMM8_SIGNED, only if @p input1 is QASYMM8_SIGNED. - * - S16. - * - QSYMM16, only if both inputs are QSYMM16. - * - S32, only if both inputs are QSYMM16. - * - F16, only if @p input1 is F16. - * - F32, only if both inputs are F32. + * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 + * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 + * @param[in] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32/S32 * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8, QASYMM8_SIGNED or QSYMM16. diff --git a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h index 28cca4987b..523c812f7d 100644 --- a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h +++ b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h @@ -59,7 +59,7 @@ public: /** Set the source, destination of the kernel * - * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32. Data layouts supported: NCHW. + * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX. * Output will have the same number of dimensions as input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 @@ -69,7 +69,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperationKernel. * - * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32. Data layouts supported: NCHW. + * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX. * Output will have the same number of dimensions as input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 diff --git a/arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h b/arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h index 9038eda9b2..5bd702aef6 100644 --- a/arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h @@ -52,7 +52,7 @@ public: ~NEUpsampleLayerKernel() = default; /** Set the input output tensors. * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[out] output Destination tensor. Data types supported: same as @p input. * @param[in] info Contains stride information described in @ref Size2D. * @param[in] policy Defines the policy to fill the intermediate pixels. @@ -61,7 +61,7 @@ public: void configure(const ITensor *input, ITensor *output, const Size2D &info, const InterpolationPolicy policy); /** Static function to check if given info will lead to a valid configuration of @ref NEUpsampleLayerKernel * - * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] output Destination tensor info. Data types supported: same as @p input. * @param[in] info Contains stride information described in @ref Size2D. * @param[in] policy Defines the policy to fill the intermediate pixels. diff --git a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h index b68cb50c7b..c6e4053293 100644 --- a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h +++ b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h @@ -76,7 +76,7 @@ public: * * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. - * Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/FP16/F32 + * Data types supported: All * @param[in] bias The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. @@ -87,7 +87,7 @@ public: * * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. - * Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32 + * Data types supported: All * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h index 0bab911c1a..90a33d9c2f 100644 --- a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h +++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h @@ -46,6 +46,18 @@ class NEArithmeticSubtraction : public INEOperator { public: /** Initialise the kernel's inputs, output and conversion policy. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (QASYMM8, QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (S16,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 * * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 @@ -55,6 +67,18 @@ public: */ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtraction + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (QASYMM8, QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (S16,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 * * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 diff --git a/arm_compute/runtime/NEON/functions/NECast.h b/arm_compute/runtime/NEON/functions/NECast.h index 55c21a01ec..a124452c72 100644 --- a/arm_compute/runtime/NEON/functions/NECast.h +++ b/arm_compute/runtime/NEON/functions/NECast.h @@ -44,23 +44,23 @@ public: * Valid conversions Input -> Output : * * - QASYMM8_SIGNED -> S16, S32, F32, F16 - * - QASYMM8 -> U16, S16, S32, F32, F16 - * - U8 -> U16, S16, S32, F32, F16 - * - U16 -> U8, U32 - * - S16 -> QASYMM8_SIGNED, U8, S32 - * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8 - * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8 - * - F32 -> QASYMM8_SIGNED, QASYMM8, F16, S32, U8 + * - QASYMM8 -> U16, S16, S32, F32, F16 + * - U8 -> U16, S16, S32, F32, F16 + * - U16 -> U8, U32 + * - S16 -> QASYMM8_SIGNED, U8, S32 + * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8 + * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8 + * - F32 -> QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8 * * @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/S32/F32. - * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/F16/F32. + * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/BFLOAT16/F16/F32. * @param[in] policy Conversion policy. */ void configure(ITensor *input, ITensor *output, ConvertPolicy policy); /** Static function to check if given info will lead to a valid configuration of @ref NECast * * @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/S32/F32. - * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/BFLOAT16/F16/F32. * @param[in] policy Conversion policy. * * @return a status diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h index f8d0ce8b2d..4ac8e72fda 100644 --- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -39,13 +39,13 @@ class NEDequantizationLayer : public INESimpleFunctionNoBorder public: /** Configure the kernel. * - * @param[in] input Source tensor. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayer * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[in] output Output tensor info. Data type supported: F16/F32. * * @return a status diff --git a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h index d616762a5a..10dddc7afb 100644 --- a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -55,9 +55,9 @@ public: NEDetectionPostProcessLayer &operator=(const NEDetectionPostProcessLayer &) = delete; /** Configure the detection output layer NE function * - * @param[in] input_box_encoding The bounding box input tensor. Data types supported: F32, QASYMM8. - * @param[in] input_score The class prediction input tensor. Data types supported: Same as @p input_box_encoding. - * @param[in] input_anchors The anchors input tensor. Data types supported: Same as @p input_box_encoding. + * @param[in] input_box_encoding The bounding box input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32. + * @param[in] input_score The class prediction input tensor. Data types supported: same as @p input_box_encoding. + * @param[in] input_anchors The anchors input tensor. Data types supported: same as @p input_box_encoding. * @param[out] output_boxes The boxes output tensor. Data types supported: F32. * @param[out] output_classes The classes output tensor. Data types supported: Same as @p output_boxes. * @param[out] output_scores The scores output tensor. Data types supported: Same as @p output_boxes. @@ -70,14 +70,14 @@ public: ITensor *output_boxes, ITensor *output_classes, ITensor *output_scores, ITensor *num_detection, DetectionPostProcessLayerInfo info = DetectionPostProcessLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEDetectionPostProcessLayer * - * @param[in] input_box_encoding The bounding box input tensor info. Data types supported: F32, QASYMM8. - * @param[in] input_class_score The class prediction input tensor info. Data types supported: F32, QASYMM8. - * @param[in] input_anchors The anchors input tensor. Data types supported: F32, QASYMM8. - * @param[out] output_boxes The output tensor. Data types supported: F32. - * @param[out] output_classes The output tensor. Data types supported: Same as @p output_boxes. - * @param[out] output_scores The output tensor. Data types supported: Same as @p output_boxes. - * @param[out] num_detection The number of output detection. Data types supported: Same as @p output_boxes. - * @param[in] info (Optional) DetectionPostProcessLayerInfo information. + * @param[in] input_box_encoding The bounding box input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32. + * @param[in] input_class_score The class prediction input tensor info. Data types supported: same as @p input_box_encoding. + * @param[in] input_anchors The anchors input tensor info. Data types supported: same as @p input_box_encoding. + * @param[in] output_boxes The output tensor info. Data types supported: F32. + * @param[in] output_classes The output tensor info. Data types supported: Same as @p output_boxes. + * @param[in] output_scores The output tensor info. Data types supported: Same as @p output_boxes. + * @param[in] num_detection The number of output detection tensor info. Data types supported: Same as @p output_boxes. + * @param[in] info (Optional) DetectionPostProcessLayerInfo information. * * @return a status */ diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h index 08f798ec6e..d24f975052 100644 --- a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h +++ b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h @@ -292,7 +292,7 @@ public: * * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[out] output Output tensor. Data types supported: U16/U32. + * @param[out] output Output tensor. Data types supported: U8. * @param[in] op Comparison Operation to be performed. */ void configure(ITensor *input1, ITensor *input2, ITensor *output, ComparisonOperation op); @@ -300,7 +300,7 @@ public: * * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: U16/U32. + * @param[in] output Output tensor info. Data types supported: U8. * @param[in] op Comparison Operation to be performed. * * @return a status diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h index 1fd24887a5..762329d833 100644 --- a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h +++ b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h @@ -96,13 +96,13 @@ class NELogLayer : public INESimpleFunctionNoBorder public: /** Initialize the function * - * @param[in] input Input tensor. Data types supported: F16/F32/S32. + * @param[in] input Input tensor. Data types supported: F16/F32. * @param[out] output Output tensor. Data types supported: same as @p input. */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NELogLayer * - * @param[in] input First tensor input info. Data types supported: F16/F32/S32. + * @param[in] input First tensor input info. Data types supported: F16/F32. * @param[in] output Output tensor info. Data types supported: Same as @p input. * * @return a status diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h index e7da1006e0..8bff3ba431 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h @@ -64,21 +64,21 @@ public: /** Set the input and output tensors. * * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. + * Data type supported: All. * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED, FP32 if @p weights is BFLOAT16 - * @param[out] output Destination tensor. - * Data types supported: Same as @p weights, FP32 if @p weights is BFLOAT16 + * Data type supported: same as @p weights. + * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. + * @param[out] output Destination tensor. Data types supported: same as @p weights. */ void configure(const ITensor *weights, const ITensor *biases, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayerReshapeWeights * * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. + * Data type supported: All. * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED, FP32 if @p weights is BFLOAT16 - * @param[in] output Destination tensor. - * Data types supported: Same as @p weights FP32 if @p weights is BFLOAT16 + * Data type supported: same as @p weights. + * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. + * @param[in] output Destination tensor. Data types supported: same as @p weights. * * @return an error status */ diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h index cb905a3652..79abcdb1e2 100644 --- a/arm_compute/runtime/NEON/functions/NEIm2Col.h +++ b/arm_compute/runtime/NEON/functions/NEIm2Col.h @@ -46,7 +46,7 @@ public: * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 - * Note: QASYMM8 works only for has_bias = false + * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false * @param[out] output The output tensor. Data types supported: Same as @p input * @param[in] kernel_dims The kernel dimensions (width and height). * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. @@ -61,7 +61,7 @@ public: * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 - * Note: QASYMM8 works only for has_bias = false + * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false * @param[in] output The output tensor. Data types supported: Same as @p input * @param[in] kernel_dims The kernel dimensions (width and height). * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h index 3b1209356a..4b32f964fe 100644 --- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h +++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h @@ -39,23 +39,29 @@ class NEPixelWiseMultiplication : public INEOperator { public: /** Initialise the kernel's inputs, output and convertion policy. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,U8) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * - (QSYMM16,QSYMM16) -> S32 * * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. * - * @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 + * @param[in, out] input1 First input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 * This input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). + * @param[in, out] input2 Second input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 * This input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: - * - U8, only if both inputs are U8. - * - QASYMM8, only if both inputs are QASYMM8. - * - QASYMM8_SIGNED, only if @p input1 is QASYMM8_SIGNED. - * - S16. - * - QSYMM16, only if both inputs are QSYMM16. - * - S32, only if both inputs are QSYMM16. - * - F16, only if @p input1 is F16. - * - F32, only if both inputs are F32. + * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32/S32 * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8, QASYMM8_SIGNED or QSYMM16. @@ -65,21 +71,27 @@ public: void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplication + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,U8) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * - (QSYMM16,QSYMM16) -> S32 * * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. * - * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). - * @param[in] output Output tensor info. Data types supported: - * - U8, only if both inputs are U8. - * - QASYMM8, only if both inputs are QASYMM8. - * - QASYMM8_SIGNED, only if @p input1 is QASYMM8_SIGNED. - * - S16. - * - QSYMM16, only if both inputs are QSYMM16. - * - S32, only if both inputs are QSYMM16. - * - F16, only if @p input1 is F16. - * - F32, only if both inputs are F32. + * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 + * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 + * @param[in] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32/S32 * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8, QASYMM8_SIGNED or QSYMM16. diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h index e43741c95b..8d018b77a4 100644 --- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h @@ -58,10 +58,10 @@ public: * * @note F16 is supported for pool sizes 2 and 3 only * - * @param[in] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] output Destination tensor. Data types supported: Same as @p input. + * @param[in] input Source tensor info. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Destination tensor info. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32. + * @param[in] indices (optional) Tensor info of the indices of the maximal values. Data type supported: U32. * * @return a status */ diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h index 78e8b04dbb..a7c6e238c4 100644 --- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h +++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h @@ -28,7 +28,6 @@ #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h" -#include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" #include "arm_compute/runtime/Tensor.h" @@ -49,17 +48,17 @@ public: NEReductionOperation(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0) - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 - * @param[in] op Reduction operation to perform. - * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true. + * @param[in, out] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. (Written to only for border_size != 0) + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. + * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 + * @param[in] op Reduction operation to perform. + * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true. */ void configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op, bool keep_dims = true); /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperation. * - * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0) + * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 * @param[in] op Reduction operation to perform. diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h index 30bcdc168b..901001fd76 100644 --- a/arm_compute/runtime/NEON/functions/NEScale.h +++ b/arm_compute/runtime/NEON/functions/NEScale.h @@ -47,7 +47,7 @@ public: NEScale(); /** Initialize the function's source, destination, interpolation type and border_mode. * - * @param[in, out] input Source tensor. Data type supported: U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) + * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) * @param[out] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] policy The interpolation type. * @param[in] border_mode Strategy to use for borders. @@ -61,14 +61,14 @@ public: SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, bool align_corners = false); /** Initialize the function's source, destination, interpolation type and border_mode. * - * @param[in, out] input Source tensor. Data type supported: U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) + * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) * @param[out] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] info @ref ScaleKernelInfo to be used for configuration */ void configure(ITensor *input, ITensor *output, const ScaleKernelInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref NEScale * - * @param[in] input Source tensor. Data type supported: U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) * @param[in] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] policy The interpolation type. * @param[in] border_mode Strategy to use for borders. @@ -84,7 +84,7 @@ public: PixelValue constant_border_value = PixelValue(), SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, bool align_corners = false); /** Static function to check if given info will lead to a valid configuration of @ref NEScale * - * @param[in] input Source tensor. Data type supported: U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) * @param[in] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] info @ref ScaleKernelInfo to be used for validation * diff --git a/arm_compute/runtime/NEON/functions/NEUnstack.h b/arm_compute/runtime/NEON/functions/NEUnstack.h index dbb04f08e5..6f8560b2bd 100644 --- a/arm_compute/runtime/NEON/functions/NEUnstack.h +++ b/arm_compute/runtime/NEON/functions/NEUnstack.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -47,8 +47,8 @@ public: NEUnstack(); /** Set the input, output and unstacking axis. * - * @param[in] input A tensor to be unstacked. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. - * @param[in,out] output_vector A vector of tensors. Data types supported: Same as @p input. + * @param[in] input A tensor to be unstacked. Data type supported: All. + * @param[in,out] output_vector A vector of tensors. Data types supported: same as @p input. * Note: The number of elements of the vector will be used as the number of slices to be taken from the axis. * @param[in] axis The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around. * @@ -56,8 +56,8 @@ public: void configure(const ITensor *input, const std::vector &output_vector, int axis); /** Static function to check if given info will lead to a valid configuration of @ref NEUnstack * - * @param[in] input Input tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[in] output_vector Vector of output tensors' info. Data types supported: Same as @p input. + * @param[in] input Input tensor info. Data type supported: All. + * @param[in] output_vector Vector of output tensors' info. Data types supported: same as @p input. * @param[in] axis The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around. * * @return a status diff --git a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp index 62285e0578..28f30717ab 100644 --- a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp +++ b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -32,15 +32,9 @@ #include "arm_compute/core/Validate.h" #include -#include - -using namespace arm_compute; namespace arm_compute { -class Coordinates; -} // namespace arm_compute - namespace { void abs_diff_U8_U8_U8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) @@ -140,7 +134,7 @@ void NEAbsoluteDifferenceKernel::configure(const ITensor *input1, const ITensor { set_format_if_unknown(*output->info(), Format::S16); } - else if(input1->info()->data_type() == DataType::F32 || input2->info()->data_type() == DataType::F32) + else if(input1->info()->data_type() == DataType::U8 || input2->info()->data_type() == DataType::U8) { set_format_if_unknown(*output->info(), Format::U8); } @@ -210,3 +204,4 @@ void NEAbsoluteDifferenceKernel::run(const Window &window, const ThreadInfo &inf _func(_input1, _input2, _output, window); } +} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index 43426dc122..1db9ff9e3f 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -27,28 +27,23 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/NEAsymm.h" -#include "arm_compute/core/NEON/NEFixedPoint.h" -#include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/NEON/NESymm.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include -#include -#include -#include #include -using namespace arm_compute; +namespace arm_compute +{ namespace { Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info) { ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32); const static std::set qasymm8_supported_activations = { @@ -874,3 +869,4 @@ void NEActivationLayerKernel::run_op(const InputTensorMap &inputs, (this->*_func)(inputs.at(TensorType::ACL_SRC), outputs.at(TensorType::ACL_DST), window); } +} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp index 5650b810d2..a5a574de63 100644 --- a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp @@ -26,17 +26,13 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/NEAsymm.h" -#include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" -#include - namespace arm_compute { namespace diff --git a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp index c4c0f01f0f..4ad3dd76f9 100644 --- a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp @@ -25,12 +25,9 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/wrapper/wrapper.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include -#include using namespace arm_compute::misc::shape_calculator; diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp index ef196ab904..a639a926ec 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp @@ -427,12 +427,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, if(is_data_type_quantized_per_channel(weights->data_type())) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QSYMM8_PER_CHANNEL); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != weights->quantization_info().scale().size()); } else { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); } if(biases != nullptr) @@ -454,6 +453,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, { const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); } return Status{}; diff --git a/src/core/NEON/kernels/NEDilateKernel.cpp b/src/core/NEON/kernels/NEDilateKernel.cpp index e761815f9e..b4cc699c8f 100644 --- a/src/core/NEON/kernels/NEDilateKernel.cpp +++ b/src/core/NEON/kernels/NEDilateKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -30,16 +30,9 @@ #include "arm_compute/core/Validate.h" #include -#include -#include - -using namespace arm_compute; namespace arm_compute { -class Coordinates; -} // namespace arm_compute - BorderSize NEDilateKernel::border_size() const { return BorderSize(1); @@ -47,6 +40,10 @@ BorderSize NEDilateKernel::border_size() const void NEDilateKernel::configure(const ITensor *input, ITensor *output, bool border_undefined) { + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + _input = input; _output = output; @@ -126,3 +123,4 @@ void NEDilateKernel::run(const Window &window, const ThreadInfo &info) }, in, out); } +} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEErodeKernel.cpp b/src/core/NEON/kernels/NEErodeKernel.cpp index 2a538ecd0f..edfcbb50c4 100644 --- a/src/core/NEON/kernels/NEErodeKernel.cpp +++ b/src/core/NEON/kernels/NEErodeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -30,16 +30,9 @@ #include "arm_compute/core/Validate.h" #include -#include -#include - -using namespace arm_compute; namespace arm_compute { -class Coordinates; -} // namespace arm_compute - BorderSize NEErodeKernel::border_size() const { return BorderSize(1); @@ -47,6 +40,10 @@ BorderSize NEErodeKernel::border_size() const void NEErodeKernel::configure(const ITensor *input, ITensor *output, bool border_undefined) { + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + _input = input; _output = output; @@ -126,3 +123,4 @@ void NEErodeKernel::run(const Window &window, const ThreadInfo &info) }, in, out); } +} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp index a48601f7b0..9dbf245c7a 100644 --- a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp +++ b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -33,19 +33,17 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include - -using namespace arm_compute; +namespace arm_compute +{ using namespace misc::shape_calculator; namespace { Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); // Checks performed when output is configured if(output->total_size() != 0) @@ -135,3 +133,4 @@ void NEFlattenLayerKernel::run(const Window &window, const ThreadInfo &info) } while(in_window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_1D(out_slice)); } +} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp index 6e7e5ab23f..b71630dba3 100644 --- a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp @@ -32,7 +32,6 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" -#include "utils/TypePrinter.h" #include namespace arm_compute diff --git a/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp index 1acdb1efce..a8a976cd6b 100644 --- a/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp @@ -24,19 +24,10 @@ #include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h" #include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -#include -#include namespace arm_compute { @@ -45,7 +36,7 @@ namespace Status validate_arguments_matrix_a_reduction(const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8, DataType::QSYMM8_PER_CHANNEL); if(output->total_size() > 0) { diff --git a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp index 88104f7297..9080051e93 100644 --- a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,23 +24,16 @@ #include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include -#include -#include - -using namespace arm_compute; +namespace arm_compute +{ namespace { TensorShape get_output_shape(const ITensorInfo *input) @@ -57,7 +50,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); if(output->total_size() != 0) { @@ -192,3 +184,4 @@ void NEGEMMTranspose1xWKernel::run(const Window &window, const ThreadInfo &info) } } } +} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp index 1967c553bd..1f65e3260b 100644 --- a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp @@ -23,16 +23,8 @@ */ #include "arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CPP/Validate.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/NEAsymm.h" -#include "arm_compute/core/NEON/NEFixedPoint.h" -#include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" @@ -48,6 +40,10 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output, indices); + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32); + int pool_stride_x = 0; int pool_stride_y = 0; PoolingType pool_type = pool_info.pool_type; @@ -56,10 +52,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c const int pool_size_x = pool_info.pool_size.width; const int pool_size_y = pool_info.pool_size.height; const Size2D pool_size(pool_size_x, pool_size_y); - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(pool_type != PoolingType::MAX, "Pooling indices only supported for MAX pooling method"); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG((pool_size != Size2D(2, 2)), "Pooling indices only supported for pool size 2x2"); if(output->total_size() != 0) { @@ -72,20 +66,19 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c } // namespace NEMaxUnpoolingLayerKernel::NEMaxUnpoolingLayerKernel() - : _func(nullptr), _input(nullptr), _output(nullptr), _indices(nullptr), _pool_info(), _data_layout(DataLayout::UNKNOWN), _num_elems_processed_per_iteration(0) + : _func(nullptr), _input(nullptr), _output(nullptr), _indices(nullptr) { } void NEMaxUnpoolingLayerKernel::configure(const ITensor *input, const ITensor *indices, ITensor *output, const PoolingLayerInfo &pool_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - const Size2D pool_size(pool_info.pool_size.width, pool_info.pool_size.height); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, indices->info())); - _input = input; - _output = output; - _indices = indices; - _pool_info = pool_info; - _data_layout = input->info()->data_layout(); + + _input = input; + _output = output; + _indices = indices; + switch(input->info()->data_type()) { case DataType::F32: @@ -107,8 +100,8 @@ void NEMaxUnpoolingLayerKernel::configure(const ITensor *input, const ITensor *i } const TensorShape output_shape = compute_unpool_shape(*input->info(), pool_info); auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape)); - _num_elems_processed_per_iteration = 1; - auto window = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration)); + + auto window = calculate_max_window(*input->info(), Steps()); INEKernel::configure(window); } template diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp index cd1c4b28cc..4b2352f4c2 100644 --- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp @@ -62,17 +62,18 @@ inline Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *i if(output->total_size() > 0) { - if(is_data_type_quantized(output->data_type())) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output); - } - const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), "Wrong shape for output"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::U8 && (input1->data_type() != DataType::U8 || input2->data_type() != DataType::U8), "Output can only be U8 if both inputs are U8"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::QASYMM8 && (input1->data_type() != DataType::QASYMM8 || input2->data_type() != DataType::QASYMM8), + "Output can only be QASYMM8 if both inputs are QASYMM8"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::QASYMM8_SIGNED && (input1->data_type() != DataType::QASYMM8_SIGNED || input2->data_type() != DataType::QASYMM8_SIGNED), + "Output can only be QASYMM8 if both inputs are QASYMM8"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::QSYMM16 && (input1->data_type() != DataType::QSYMM16 || input2->data_type() != DataType::QSYMM16), + "Output can only be QSYMM16 if both inputs are QSYMM16"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::S32 && (input1->data_type() != DataType::QSYMM16 || input2->data_type() != DataType::QSYMM16), "Output can only be S32 if both inputs are QSYMM16"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::S32 && scale != 1.f, "Unsupported scale for QSYMM16 inputs and S32 output"); diff --git a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp index d830d0db67..a1180d5e61 100644 --- a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp @@ -29,7 +29,6 @@ #include "arm_compute/core/Validate.h" #include -#include namespace arm_compute { @@ -68,6 +67,7 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, if(output != nullptr && output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != 2); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, output); } return Status{}; diff --git a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp index 00c3f98334..5cf2bd288c 100644 --- a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp @@ -23,18 +23,13 @@ */ #include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CPP/Validate.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "support/ToolchainSupport.h" #include -#include namespace arm_compute { @@ -53,7 +48,7 @@ void NEROIPoolingLayerKernel::configure(const ITensor *input, const ITensor *roi ARM_COMPUTE_ERROR_ON(rois->info()->dimension(0) != 5); ARM_COMPUTE_ERROR_ON(rois->info()->num_dimensions() > 2); ARM_COMPUTE_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); ARM_COMPUTE_ERROR_ON((pool_info.pooled_width() == 0) || (pool_info.pooled_height() == 0)); if(output->info()->total_size() != 0) diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp index 5a8c446ddd..8c3c59559f 100644 --- a/src/core/NEON/kernels/NEReverseKernel.cpp +++ b/src/core/NEON/kernels/NEReverseKernel.cpp @@ -23,24 +23,11 @@ */ #include "arm_compute/core/NEON/kernels/NEReverseKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/NEAsymm.h" -#include "arm_compute/core/NEON/NEFixedPoint.h" -#include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" -#include -#include -#include -#include - namespace arm_compute { namespace @@ -48,7 +35,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output, axis); - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); + //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(axis, 1, DataType::U32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis->num_dimensions() > 1, "Axis must be a 1D tensor"); @@ -159,28 +146,19 @@ void NEReverseKernel::run(const Window &window, const ThreadInfo &info) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - switch(_input->info()->data_type()) + switch(_input->info()->element_size()) { - case DataType::F32: - case DataType::U32: - case DataType::S32: + case 4: run_reverse(window, _input, _axis, _output); break; -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F16: -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::S16: - case DataType::U16: + case 2: run_reverse(window, _input, _axis, _output); break; - case DataType::QASYMM8: - case DataType::QASYMM8_SIGNED: - case DataType::U8: - case DataType::S8: + case 1: run_reverse(window, _input, _axis, _output); break; default: - ARM_COMPUTE_ERROR("Data type not supported"); + ARM_COMPUTE_ERROR("Element size not supported"); } } } // namespace arm_compute \ No newline at end of file diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp index d376d53081..f271f57f19 100644 --- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp +++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp @@ -23,15 +23,11 @@ */ #include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" -#include "arm_compute/core/Dimensions.h" -#include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -using namespace arm_compute; - +namespace arm_compute +{ namespace { TensorShape get_output_shape(const ITensorInfo *input, bool has_bias) @@ -48,11 +44,9 @@ TensorShape get_output_shape(const ITensorInfo *input, bool has_bias) Status validate_arguments(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output) { + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, - DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, - DataType::BFLOAT16, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); if(biases != nullptr) { @@ -179,3 +173,4 @@ void NEWeightsReshapeKernel::run(const Window &window, const ThreadInfo &info) }, in); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NERNNLayer.cpp b/src/runtime/NEON/functions/NERNNLayer.cpp index 19b84e7fb8..5385192f16 100644 --- a/src/runtime/NEON/functions/NERNNLayer.cpp +++ b/src/runtime/NEON/functions/NERNNLayer.cpp @@ -43,6 +43,7 @@ Status NERNNLayer::validate(const ITensorInfo *input, const ITensorInfo *weights const ITensorInfo *output, const ActivationLayerInfo &info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F16, DataType::F32); const int idx_width = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH); const int idx_height = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT); -- cgit v1.2.1