diff options
author | Sheri Zhang <sheri.zhang@arm.com> | 2021-05-04 14:03:13 +0100 |
---|---|---|
committer | Sheri Zhang <sheri.zhang@arm.com> | 2021-05-05 09:38:50 +0000 |
commit | 6124ce60b54eb5639ed19d46c79fce21cca2c83b (patch) | |
tree | 9754cfdd6f1fed0eaa6866e04c1d4e4a57004810 /arm_compute/runtime | |
parent | 0ec58215bf5322d370dbc7c0a7f3ced05af2174f (diff) | |
download | ComputeLibrary-6124ce60b54eb5639ed19d46c79fce21cca2c83b.tar.gz |
Update operator list part3
Partially resolve: COMPMID-4199
Signed-off-by: Sheri Zhang <sheri.zhang@arm.com>
Change-Id: Id24702d258fb4e04ad948e7cf6c0efd98d2a5456
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5561
Reviewed-by: TeresaARM <teresa.charlinreyes@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/runtime')
23 files changed, 466 insertions, 61 deletions
diff --git a/arm_compute/runtime/CL/functions/CLComparison.h b/arm_compute/runtime/CL/functions/CLComparison.h index 8cc3e96ec5..3f984900ee 100644 --- a/arm_compute/runtime/CL/functions/CLComparison.h +++ b/arm_compute/runtime/CL/functions/CLComparison.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,6 +40,14 @@ class CLComparison : public ICLSimpleFunction public: /** Initialise the kernel's inputs and outputs. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------|:--------|:--------| + * |All |All |U8 | + * * @param[in] input1 Source tensor. Data types supported: All. * The input1 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. * @param[in] input2 Source tensor. Data types supported: Same as @p input1. diff --git a/arm_compute/runtime/CL/functions/CLCrop.h b/arm_compute/runtime/CL/functions/CLCrop.h index dc509b5b84..d2b72a5eff 100644 --- a/arm_compute/runtime/CL/functions/CLCrop.h +++ b/arm_compute/runtime/CL/functions/CLCrop.h @@ -55,6 +55,14 @@ public: * * @note Supported tensor rank: up to 4 * + * Valid data layouts: + * - NHWC + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |F32 | + * * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC. * @param[out] output Destination tensor. Data type supported: F32 * @param[in] start Coordinates of where to start cropping the image. diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h index 6c1302fbf7..344ebd0afb 100644 --- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h +++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h @@ -61,6 +61,15 @@ public: /** Initialize the function's source, destination, interpolation type and border_mode. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in, out] input Source tensor. Data type supported: All. * @param[out] output Destination tensor. Data type supported: same as @p input. * @param[in] info Contains padding and policies to be used in the deconvolution. diff --git a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h index a23500e16b..567de13508 100644 --- a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h @@ -87,6 +87,20 @@ public: CLDirectDeconvolutionLayer &operator=(CLDirectDeconvolutionLayer &&) = default; /** Set the input, weights, biases and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | + * * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input or QSYMM8_PER_CHANNEL if @p input is QASYMM8/QASYMM8_SIGNED. diff --git a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h index fd6942cad5..79b79e89de 100644 --- a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h +++ b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h @@ -54,6 +54,15 @@ public: CLRsqrtLayer &operator=(CLRsqrtLayer &&); /** Initialize the function * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | + * * @param[in] input Input tensor. Data types supported: F16/F32. * @param[out] output Output tensor. Data types supported: same as @p input. */ @@ -100,6 +109,15 @@ public: CLExpLayer &operator=(CLExpLayer &&); /** Initialize the function * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | + * * @param[in] input Input tensor. Data types supported: F16/F32. * @param[out] output Output tensor. Data types supported: same as @p input. */ @@ -146,6 +164,15 @@ public: CLNegLayer &operator=(CLNegLayer &&); /** Initialize the function * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | + * * @param[in] input Input tensor. Data types supported: F16/F32. * @param[out] output Output tensor. Data types supported: same as @p input. */ @@ -192,6 +219,15 @@ public: CLSinLayer &operator=(CLSinLayer &&); /** Initialize the function * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | + * * @param[in] input Input tensor. Data types supported: F16/F32. * @param[out] output Output tensor. Data types supported: same as @p input. */ @@ -238,6 +274,15 @@ public: CLLogLayer &operator=(CLLogLayer &&); /** Initialize the function * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | + * * @param[in] input Input tensor. Data types supported: F16/F32. * @param[out] output Output tensor. Data types supported: same as @p input. */ @@ -284,6 +329,15 @@ public: CLAbsLayer &operator=(CLAbsLayer &&); /** Initialize the function * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | + * * @param[in] input Input tensor. Data types supported: F16/F32. * @param[out] output Output tensor. Data types supported: same as @p input. */ @@ -330,6 +384,15 @@ public: CLRoundLayer &operator=(CLRoundLayer &&); /** Initialize the function * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | + * * @param[in] input Input tensor. Data types supported: F16/F32. * @param[out] output Output tensor. Data types supported: same as @p input. */ diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h index 2b291517f3..555e84a251 100644 --- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h +++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h @@ -55,19 +55,23 @@ public: CLArithmeticAddition &operator=(CLArithmeticAddition &&); /** Initialise the kernel's inputs, output and conversion policy. * - * Valid configurations (Input1,Input2) -> Output : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |U8 |U8 |U8 | + * |U8 |U8 |S16 | + * |U8 |S16 |S16 | + * |S16 |U8 |S16 | + * |S16 |S16 |S16 | + * |S32 |S32 |S32 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. @@ -161,19 +165,23 @@ public: CLArithmeticSubtraction &operator=(CLArithmeticSubtraction &&); /** Initialise the kernel's inputs, output and conversion policy. * - * Valid configurations (Input1,Input2) -> Output : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |U8 |U8 |U8 | + * |U8 |U8 |S16 | + * |U8 |S16 |S16 | + * |S16 |U8 |S16 | + * |S16 |S16 |S16 | + * |S32 |S32 |S32 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. @@ -267,6 +275,15 @@ public: CLArithmeticDivision &operator=(CLArithmeticDivision &&); /** Initialise the kernel's inputs, output. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. * @param[in, out] input2 Second tensor input. Same as @p input1. @@ -327,6 +344,22 @@ public: CLElementwiseMax &operator=(CLElementwiseMax &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |U8 |U8 |U8 | + * |S16 |S16 |S16 | + * |S32 |S32 |S32 | + * |U32 |U32 |U32 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1. @@ -387,6 +420,22 @@ public: CLElementwiseMin &operator=(CLElementwiseMin &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |U8 |U8 |U8 | + * |S16 |S16 |S16 | + * |S32 |S32 |S32 | + * |U32 |U32 |U32 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1. @@ -447,6 +496,20 @@ public: CLElementwiseSquaredDiff &operator=(CLElementwiseSquaredDiff &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |U8 |U8 |U8 | + * |S16 |S16 |S16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1. @@ -507,6 +570,15 @@ public: CLElementwisePower &operator=(CLElementwisePower &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. * @param[in, out] input2 Second tensor input. Data types supported: F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h index 32af0f9427..6e482c98e7 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -93,6 +93,17 @@ public: ~CLGEMMDeconvolutionLayer(); /** Set the input, weights, biases and output tensors. * + * Valid data layouts: + * - NHWC + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:--------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. Data layout supported: NHWC * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. Data layout supported: same as @p input. diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h index 0f051ecffd..a60992a0f4 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h +++ b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h @@ -288,10 +288,20 @@ class CLGEMMLowpOutputStage : public ICLSimpleFunction public: /** Initialise the kernel's inputs, output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:-------------| + * |S32 |S32 |QASYMM8 | + * |S32 |S32 |QASYMM8_SIGNED| + * |S32 |S32 |QSYMM16 | + * * @param[in] input Input tensor. Data type supported: S32 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM16 * @param[in] info GEMMLowp output stage metadata. */ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info); diff --git a/arm_compute/runtime/CL/functions/CLLogicalAnd.h b/arm_compute/runtime/CL/functions/CLLogicalAnd.h index f7038ee97a..61a15816eb 100644 --- a/arm_compute/runtime/CL/functions/CLLogicalAnd.h +++ b/arm_compute/runtime/CL/functions/CLLogicalAnd.h @@ -87,6 +87,14 @@ public: CLLogicalAnd &operator=(CLLogicalAnd &&); /** Initialize the function * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:------------| + * |U8 |U8 |U8 | + * * @param[in] input1 Input tensor. Data types supported: U8. * @param[in] input2 Input tensor. Data types supported: same as @p input1. * @param[out] output Output tensor. Data types supported: same as @p input1. diff --git a/arm_compute/runtime/CL/functions/CLLogicalNot.h b/arm_compute/runtime/CL/functions/CLLogicalNot.h index 772f16b942..27fd0f9c9f 100644 --- a/arm_compute/runtime/CL/functions/CLLogicalNot.h +++ b/arm_compute/runtime/CL/functions/CLLogicalNot.h @@ -58,6 +58,14 @@ public: CLLogicalNot &operator=(CLLogicalNot &&); /** Initialize the function * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:------------| + * |U8 |U8 | + * * @param[in] input Input tensor. Data types supported: U8. * @param[out] output Output tensor. Data types supported: same as @p input. */ diff --git a/arm_compute/runtime/CL/functions/CLLogicalOr.h b/arm_compute/runtime/CL/functions/CLLogicalOr.h index 948baee9d9..b9ffb4a449 100644 --- a/arm_compute/runtime/CL/functions/CLLogicalOr.h +++ b/arm_compute/runtime/CL/functions/CLLogicalOr.h @@ -87,6 +87,14 @@ public: CLLogicalOr &operator=(CLLogicalOr &&); /** Initialize the function * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:------------| + * |U8 |U8 |U8 | + * * @param[in] input1 Input tensor. Data types supported: U8. * @param[in] input2 Input tensor. Data types supported: same as @p input1. * @param[out] output Output tensor. Data types supported: same as @p input1. diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h index ddb35ae56f..721a47144e 100644 --- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h @@ -60,6 +60,17 @@ public: ~CLSoftmaxLayerGeneric(); /** Set the input and output tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax * @param[out] output Destination tensor. Data types supported: same as @p input * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f diff --git a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h index 8cd809cc1f..d644591b57 100644 --- a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h +++ b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,6 +41,16 @@ class CLWinogradInputTransform : public ICLSimpleFunction public: /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | + * * @note Winograd input transform supports the following configurations for NCWH data layout * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h index 734e3502dd..b8e46ff36e 100644 --- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h +++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h @@ -52,8 +52,7 @@ public: /** Initialise the kernel's inputs, output and conversion policy. * * Valid data layouts: - * - NHWC - * - NCHW + * - All * * Valid data type configurations: * |src0 |src1 |dst | diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h index c741db3223..0c72e946f6 100644 --- a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h +++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h @@ -57,6 +57,25 @@ public: NEArithmeticSubtraction &operator=(NEArithmeticSubtraction &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |QSYMM16 |QSYMM16 |S32 | + * |U8 |U8 |U8 | + * |U8 |U8 |S16 | + * |U8 |S16 |S16 | + * |S16 |U8 |S16 | + * |S16 |S16 |S16 | + * |S32 |S32 |S32 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 diff --git a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h index d5c1f0ab6f..2affa8d49e 100644 --- a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -57,6 +57,16 @@ public: ~NEDetectionPostProcessLayer() = default; /** Configure the detection output layer NE function * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 - src2 |dst0 - dst3 | + * |:--------------|:--------------| + * |QASYMM8 |F32 | + * |QASYMM8_SIGNED |F32 | + * |F32 |F32 | + * * @param[in] input_box_encoding The bounding box input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32. * @param[in] input_score The class prediction input tensor. Data types supported: same as @p input_box_encoding. * @param[in] input_anchors The anchors input tensor. Data types supported: same as @p input_box_encoding. diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h index 44b70bbe85..95274bdb0c 100644 --- a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h +++ b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h @@ -54,6 +54,19 @@ public: NEElementwiseMax &operator=(NEElementwiseMax &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |S32 |S32 |S32 | + * |S16 |S16 |S16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: Same as @p input1. @@ -101,6 +114,19 @@ public: NEElementwiseMin &operator=(NEElementwiseMin &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |S32 |S32 |S32 | + * |S16 |S16 |S16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: Same as @p input1. @@ -148,6 +174,19 @@ public: NEElementwiseSquaredDiff &operator=(NEElementwiseSquaredDiff &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |S32 |S32 |S32 | + * |S16 |S16 |S16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: Same as @p input1. @@ -195,6 +234,15 @@ public: NEElementwiseDivision &operator=(NEElementwiseDivision &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: F16/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: Same as @p input1. @@ -243,6 +291,15 @@ public: NEElementwisePower &operator=(NEElementwisePower &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: F16/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: Same as @p input1. @@ -290,6 +347,20 @@ public: NEElementwiseComparison &operator=(NEElementwiseComparison &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:-----| + * |QASYMM8 |QASYMM8 |U8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |U8 | + * |S32 |S32 |U8 | + * |U8 |U8 |U8 | + * |S16 |S16 |U8 | + * |F16 |F16 |U8 | + * |F32 |F32 |U8 | + * * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: U8. diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h index 4786f71cf8..63e47b8377 100644 --- a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h +++ b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h @@ -54,6 +54,16 @@ public: /** Initialize the function * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | + * |S32 |S32 | + * * @param[in] input Input tensor. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. * @param[out] output Output tensor. Data types supported: Same as @p input. */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h index 8c3ba4f0c8..b2ffd038de 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h @@ -65,6 +65,18 @@ public: ~NEGEMMConv2d(); /** Set the input and output tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 | + * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h index c22ed1b5c4..fa5f5e3826 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h @@ -282,6 +282,16 @@ public: ~NEGEMMLowpOutputStage(); /** Initialise the kernel's inputs, output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:-------------| + * |S32 |S32 |QASYMM8 | + * |S32 |S32 |QASYMM8_SIGNED| + * |S32 |S32 |QSYMM16 | + * * @param[in] input Input tensor. Data type supported: S32 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. diff --git a/arm_compute/runtime/NEON/functions/NELogical.h b/arm_compute/runtime/NEON/functions/NELogical.h index 04ffce6221..5cf5336f4f 100644 --- a/arm_compute/runtime/NEON/functions/NELogical.h +++ b/arm_compute/runtime/NEON/functions/NELogical.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,6 +48,14 @@ public: /** Initialise the kernel's inputs and output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:------------| + * |U8 |U8 |U8 | + * * @param[in] input1 First tensor input. Data type supported: U8. * @param[in] input2 Second tensor input. Data type supported: U8. * @param[out] output Output tensor. Data type supported: U8. @@ -83,6 +91,14 @@ public: /** Initialise the kernel's inputs and output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:------------| + * |U8 |U8 |U8 | + * * @param[in] input1 First tensor input. Data type supported: U8. * @param[in] input2 Second tensor input. Data type supported: U8. * @param[out] output Output tensor. Data type supported: U8. @@ -118,6 +134,14 @@ public: /** Initialise the kernel's inputs and output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:-------------| + * |U8 |U8 | + * * @param[in] input Input tensor. Data type supported: U8. * @param[out] output Output tensor. Data type supported: U8. */ diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h index 8a2ae10129..efe959f14e 100644 --- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h @@ -52,6 +52,17 @@ public: ~NESoftmaxLayerGeneric(); /** Set the input and output tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * * @param[in,out] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. If the width is not a * multiple of the internal processing block size, @ref NEFillBorder replicates the * last value of each row to the nearest multiple. diff --git a/arm_compute/runtime/OperatorList.h b/arm_compute/runtime/OperatorList.h index a659a79423..e1c2bed41d 100644 --- a/arm_compute/runtime/OperatorList.h +++ b/arm_compute/runtime/OperatorList.h @@ -51,7 +51,7 @@ * */ -/** ArithmeticAddition (no CL) +/** ArithmeticAddition * * Description: * Function to add 2 tensors. @@ -61,7 +61,7 @@ * */ -/** ArithmeticSubtraction (no CL) +/** ArithmeticSubtraction * * Description: * Function to substract 2 tensors. @@ -161,7 +161,7 @@ * */ -/** Comparison (only CL) +/** Comparison * * Description: * Function to compare 2 tensors. @@ -216,7 +216,7 @@ * */ -/** Crop (only CL) +/** Crop * * Description: * Performs a copy of input tensor to the output tensor. @@ -246,7 +246,7 @@ * */ -/** DeconvolutionLayerUpsample (only CL) +/** DeconvolutionLayerUpsample * * Description: * Function to execute deconvolution upsample on OpenCL. @@ -296,7 +296,7 @@ * */ -/** DetectionPostProcessLayer (no CL) +/** DetectionPostProcessLayer * * Description: * Function to generate the detection output based on center size encoded boxes, class prediction and anchors by doing non maximum suppression (NMS). @@ -316,7 +316,7 @@ * */ -/** DirectDeconvolutionLayer (only CL) +/** DirectDeconvolutionLayer * * Description: * Function to run the deconvolution layer. @@ -326,7 +326,7 @@ * */ -/** ElementWiseOperations (skip) +/** ElementWiseOperations * * Description: * Function to perform in Cpu: @@ -336,7 +336,6 @@ * - Pow * - SquaredDiff * - Comparisons (Equal, greater, greater_equal, less, less_equal, not_equal) - * * Function to perform in CL: * - Add * - Sub @@ -351,18 +350,18 @@ * ANEURALNETWORKS_MINIMUM * ANEURALNETWORKS_POW * ANEURALNETWORKS_DIV - * ANEURALNETWORKS_ADD (only CL) - * ANEURALNETWORKS_SUB (only CL) - * ANEURALNETWORKS_EQUAL (no CL) - * ANEURALNETWORKS_GREATER (no CL) - * ANEURALNETWORKS_GREATER_EQUAL (no CL) - * ANEURALNETWORKS_LESS (no CL) - * ANEURALNETWORKS_LESS_EQUAL (no CL) - * ANEURALNETWORKS_NOT_EQUAL (no CL) + * ANEURALNETWORKS_ADD + * ANEURALNETWORKS_SUB + * ANEURALNETWORKS_EQUAL + * ANEURALNETWORKS_GREATER + * ANEURALNETWORKS_GREATER_EQUAL + * ANEURALNETWORKS_LESS + * ANEURALNETWORKS_LESS_EQUAL + * ANEURALNETWORKS_NOT_EQUAL * */ -/** ElementWiseOperationUnary (skip) +/** ElementwiseUnaryLayer * * Description: * Function to perform: @@ -494,7 +493,7 @@ * */ -/** GEMMConv2D (no CL) +/** GEMMConv2D * * Description: * General Matrix Multiplication. @@ -514,7 +513,7 @@ * */ -/** GEMMDeconvolutionLayer (only CL) +/** GEMMDeconvolutionLayer * * Description: * General Matrix Multiplication. @@ -574,7 +573,7 @@ * */ -/** Logical (no CL) +/** Logical * * Description: * Function to perform: @@ -587,7 +586,7 @@ * */ -/** LogicalAnd (only CL) +/** LogicalAnd * * Description: * Function to perform Logical AND. @@ -597,7 +596,7 @@ * */ -/** LogicalOr (only CL) +/** LogicalOr * * Description: * Function to perform Logical OR. @@ -607,7 +606,7 @@ * */ -/** LogicalNot (only CL) +/** LogicalNot * * Description: * Function to perform Logical NOT. @@ -724,7 +723,7 @@ /** PriorBoxLayer * * Description: - * Function to . + * Function to compute prior boxes and clip. * * Equivalent Android NNAPI Op: * n/a @@ -889,7 +888,7 @@ * */ -/** SoftmaxLayer (skip) +/** SoftmaxLayer * * Description: * Function to compute a SoftmaxLayer and a Log SoftmaxLayer. @@ -990,7 +989,7 @@ * */ -/** WinogradInputTransform (only CL) +/** WinogradInputTransform * * Description: * Function to. |