From 6124ce60b54eb5639ed19d46c79fce21cca2c83b Mon Sep 17 00:00:00 2001 From: Sheri Zhang Date: Tue, 4 May 2021 14:03:13 +0100 Subject: Update operator list part3 Partially resolve: COMPMID-4199 Signed-off-by: Sheri Zhang Change-Id: Id24702d258fb4e04ad948e7cf6c0efd98d2a5456 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5561 Reviewed-by: TeresaARM Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- arm_compute/runtime/CL/functions/CLComparison.h | 10 +- arm_compute/runtime/CL/functions/CLCrop.h | 8 + .../CL/functions/CLDeconvolutionLayerUpsample.h | 9 + .../CL/functions/CLDirectDeconvolutionLayer.h | 14 + .../runtime/CL/functions/CLElementWiseUnaryLayer.h | 63 ++ .../runtime/CL/functions/CLElementwiseOperations.h | 124 +++- .../CL/functions/CLGEMMDeconvolutionLayer.h | 13 +- .../runtime/CL/functions/CLGEMMLowpOutputStage.h | 12 +- arm_compute/runtime/CL/functions/CLLogicalAnd.h | 8 + arm_compute/runtime/CL/functions/CLLogicalNot.h | 8 + arm_compute/runtime/CL/functions/CLLogicalOr.h | 8 + arm_compute/runtime/CL/functions/CLSoftmaxLayer.h | 11 + .../CL/functions/CLWinogradInputTransform.h | 12 +- .../runtime/NEON/functions/NEArithmeticAddition.h | 3 +- .../NEON/functions/NEArithmeticSubtraction.h | 19 + .../NEON/functions/NEDetectionPostProcessLayer.h | 12 +- .../NEON/functions/NEElementwiseOperations.h | 71 ++ .../NEON/functions/NEElementwiseUnaryLayer.h | 10 + arm_compute/runtime/NEON/functions/NEGEMMConv2d.h | 12 + .../runtime/NEON/functions/NEGEMMLowpOutputStage.h | 10 + arm_compute/runtime/NEON/functions/NELogical.h | 26 +- .../runtime/NEON/functions/NESoftmaxLayer.h | 11 + arm_compute/runtime/OperatorList.h | 53 +- docs/09_operators_list.dox | 825 +++++++++++++++++++-- 24 files changed, 1232 insertions(+), 120 deletions(-) diff --git a/arm_compute/runtime/CL/functions/CLComparison.h b/arm_compute/runtime/CL/functions/CLComparison.h index 8cc3e96ec5..3f984900ee 100644 --- a/arm_compute/runtime/CL/functions/CLComparison.h +++ b/arm_compute/runtime/CL/functions/CLComparison.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,6 +39,14 @@ class CLComparison : public ICLSimpleFunction { public: /** Initialise the kernel's inputs and outputs. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------|:--------|:--------| + * |All |All |U8 | * * @param[in] input1 Source tensor. Data types supported: All. * The input1 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. diff --git a/arm_compute/runtime/CL/functions/CLCrop.h b/arm_compute/runtime/CL/functions/CLCrop.h index dc509b5b84..d2b72a5eff 100644 --- a/arm_compute/runtime/CL/functions/CLCrop.h +++ b/arm_compute/runtime/CL/functions/CLCrop.h @@ -55,6 +55,14 @@ public: * * @note Supported tensor rank: up to 4 * + * Valid data layouts: + * - NHWC + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |F32 | + * * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC. * @param[out] output Destination tensor. Data type supported: F32 * @param[in] start Coordinates of where to start cropping the image. diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h index 6c1302fbf7..344ebd0afb 100644 --- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h +++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h @@ -60,6 +60,15 @@ public: ~CLDeconvolutionLayerUpsample(); /** Initialize the function's source, destination, interpolation type and border_mode. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in, out] input Source tensor. Data type supported: All. * @param[out] output Destination tensor. Data type supported: same as @p input. diff --git a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h index a23500e16b..567de13508 100644 --- a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h @@ -86,6 +86,20 @@ public: /** Default move assignment operator */ CLDirectDeconvolutionLayer &operator=(CLDirectDeconvolutionLayer &&) = default; /** Set the input, weights, biases and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | * * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h index fd6942cad5..79b79e89de 100644 --- a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h +++ b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h @@ -53,6 +53,15 @@ public: /** Default move assignment operator */ CLRsqrtLayer &operator=(CLRsqrtLayer &&); /** Initialize the function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | * * @param[in] input Input tensor. Data types supported: F16/F32. * @param[out] output Output tensor. Data types supported: same as @p input. @@ -99,6 +108,15 @@ public: /** Default move assignment operator */ CLExpLayer &operator=(CLExpLayer &&); /** Initialize the function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | * * @param[in] input Input tensor. Data types supported: F16/F32. * @param[out] output Output tensor. Data types supported: same as @p input. @@ -145,6 +163,15 @@ public: /** Default move assignment operator */ CLNegLayer &operator=(CLNegLayer &&); /** Initialize the function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | * * @param[in] input Input tensor. Data types supported: F16/F32. * @param[out] output Output tensor. Data types supported: same as @p input. @@ -191,6 +218,15 @@ public: /** Default move assignment operator */ CLSinLayer &operator=(CLSinLayer &&); /** Initialize the function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | * * @param[in] input Input tensor. Data types supported: F16/F32. * @param[out] output Output tensor. Data types supported: same as @p input. @@ -237,6 +273,15 @@ public: /** Default move assignment operator */ CLLogLayer &operator=(CLLogLayer &&); /** Initialize the function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | * * @param[in] input Input tensor. Data types supported: F16/F32. * @param[out] output Output tensor. Data types supported: same as @p input. @@ -283,6 +328,15 @@ public: /** Default move assignment operator */ CLAbsLayer &operator=(CLAbsLayer &&); /** Initialize the function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | * * @param[in] input Input tensor. Data types supported: F16/F32. * @param[out] output Output tensor. Data types supported: same as @p input. @@ -329,6 +383,15 @@ public: /** Default move assignment operator */ CLRoundLayer &operator=(CLRoundLayer &&); /** Initialize the function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | * * @param[in] input Input tensor. Data types supported: F16/F32. * @param[out] output Output tensor. Data types supported: same as @p input. diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h index 2b291517f3..555e84a251 100644 --- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h +++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h @@ -55,19 +55,23 @@ public: CLArithmeticAddition &operator=(CLArithmeticAddition &&); /** Initialise the kernel's inputs, output and conversion policy. * - * Valid configurations (Input1,Input2) -> Output : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |U8 |U8 |U8 | + * |U8 |U8 |S16 | + * |U8 |S16 |S16 | + * |S16 |U8 |S16 | + * |S16 |S16 |S16 | + * |S32 |S32 |S32 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. @@ -161,19 +165,23 @@ public: CLArithmeticSubtraction &operator=(CLArithmeticSubtraction &&); /** Initialise the kernel's inputs, output and conversion policy. * - * Valid configurations (Input1,Input2) -> Output : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |U8 |U8 |U8 | + * |U8 |U8 |S16 | + * |U8 |S16 |S16 | + * |S16 |U8 |S16 | + * |S16 |S16 |S16 | + * |S32 |S32 |S32 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. @@ -266,6 +274,15 @@ public: /** Default move assignment operator */ CLArithmeticDivision &operator=(CLArithmeticDivision &&); /** Initialise the kernel's inputs, output. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in, out] input1 First tensor input. Data types supported: F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. @@ -326,6 +343,22 @@ public: /** Default move assignment operator */ CLElementwiseMax &operator=(CLElementwiseMax &&); /** Initialise the kernel's inputs, output and conversion policy. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |U8 |U8 |U8 | + * |S16 |S16 |S16 | + * |S32 |S32 |S32 | + * |U32 |U32 |U32 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. @@ -386,6 +419,22 @@ public: /** Default move assignment operator */ CLElementwiseMin &operator=(CLElementwiseMin &&); /** Initialise the kernel's inputs, output and conversion policy. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |U8 |U8 |U8 | + * |S16 |S16 |S16 | + * |S32 |S32 |S32 | + * |U32 |U32 |U32 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. @@ -446,6 +495,20 @@ public: /** Default move assignment operator */ CLElementwiseSquaredDiff &operator=(CLElementwiseSquaredDiff &&); /** Initialise the kernel's inputs, output and conversion policy. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |U8 |U8 |U8 | + * |S16 |S16 |S16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. @@ -506,6 +569,15 @@ public: /** Default move assignment operator */ CLElementwisePower &operator=(CLElementwisePower &&); /** Initialise the kernel's inputs, output and conversion policy. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in, out] input1 First tensor input. Data types supported: F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. diff --git a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h index 32af0f9427..6e482c98e7 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -92,6 +92,17 @@ public: /** Default desctructor */ ~CLGEMMDeconvolutionLayer(); /** Set the input, weights, biases and output tensors. + * + * Valid data layouts: + * - NHWC + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:--------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | * * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. Data layout supported: NHWC diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h index 0f051ecffd..a60992a0f4 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h +++ b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h @@ -287,11 +287,21 @@ class CLGEMMLowpOutputStage : public ICLSimpleFunction { public: /** Initialise the kernel's inputs, output + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:-------------| + * |S32 |S32 |QASYMM8 | + * |S32 |S32 |QASYMM8_SIGNED| + * |S32 |S32 |QSYMM16 | * * @param[in] input Input tensor. Data type supported: S32 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM16 * @param[in] info GEMMLowp output stage metadata. */ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info); diff --git a/arm_compute/runtime/CL/functions/CLLogicalAnd.h b/arm_compute/runtime/CL/functions/CLLogicalAnd.h index f7038ee97a..61a15816eb 100644 --- a/arm_compute/runtime/CL/functions/CLLogicalAnd.h +++ b/arm_compute/runtime/CL/functions/CLLogicalAnd.h @@ -86,6 +86,14 @@ public: /** Default move assignment operator */ CLLogicalAnd &operator=(CLLogicalAnd &&); /** Initialize the function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:------------| + * |U8 |U8 |U8 | * * @param[in] input1 Input tensor. Data types supported: U8. * @param[in] input2 Input tensor. Data types supported: same as @p input1. diff --git a/arm_compute/runtime/CL/functions/CLLogicalNot.h b/arm_compute/runtime/CL/functions/CLLogicalNot.h index 772f16b942..27fd0f9c9f 100644 --- a/arm_compute/runtime/CL/functions/CLLogicalNot.h +++ b/arm_compute/runtime/CL/functions/CLLogicalNot.h @@ -57,6 +57,14 @@ public: /** Default move assignment operator */ CLLogicalNot &operator=(CLLogicalNot &&); /** Initialize the function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:------------| + * |U8 |U8 | * * @param[in] input Input tensor. Data types supported: U8. * @param[out] output Output tensor. Data types supported: same as @p input. diff --git a/arm_compute/runtime/CL/functions/CLLogicalOr.h b/arm_compute/runtime/CL/functions/CLLogicalOr.h index 948baee9d9..b9ffb4a449 100644 --- a/arm_compute/runtime/CL/functions/CLLogicalOr.h +++ b/arm_compute/runtime/CL/functions/CLLogicalOr.h @@ -86,6 +86,14 @@ public: /** Default move assignment operator */ CLLogicalOr &operator=(CLLogicalOr &&); /** Initialize the function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:------------| + * |U8 |U8 |U8 | * * @param[in] input1 Input tensor. Data types supported: U8. * @param[in] input2 Input tensor. Data types supported: same as @p input1. diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h index ddb35ae56f..721a47144e 100644 --- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h @@ -59,6 +59,17 @@ public: /** Default destructor */ ~CLSoftmaxLayerGeneric(); /** Set the input and output tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | * * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax * @param[out] output Destination tensor. Data types supported: same as @p input diff --git a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h index 8cd809cc1f..d644591b57 100644 --- a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h +++ b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,6 +40,16 @@ class CLWinogradInputTransform : public ICLSimpleFunction { public: /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | * * @note Winograd input transform supports the following configurations for NCWH data layout * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h index 734e3502dd..b8e46ff36e 100644 --- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h +++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h @@ -52,8 +52,7 @@ public: /** Initialise the kernel's inputs, output and conversion policy. * * Valid data layouts: - * - NHWC - * - NCHW + * - All * * Valid data type configurations: * |src0 |src1 |dst | diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h index c741db3223..0c72e946f6 100644 --- a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h +++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h @@ -56,6 +56,25 @@ public: /** Default move assignment operator */ NEArithmeticSubtraction &operator=(NEArithmeticSubtraction &&); /** Initialise the kernel's inputs, output and conversion policy. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |QSYMM16 |QSYMM16 |S32 | + * |U8 |U8 |U8 | + * |U8 |U8 |S16 | + * |U8 |S16 |S16 | + * |S16 |U8 |S16 | + * |S16 |S16 |S16 | + * |S32 |S32 |S32 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 diff --git a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h index d5c1f0ab6f..2affa8d49e 100644 --- a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -56,6 +56,16 @@ public: /** Default destructor */ ~NEDetectionPostProcessLayer() = default; /** Configure the detection output layer NE function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 - src2 |dst0 - dst3 | + * |:--------------|:--------------| + * |QASYMM8 |F32 | + * |QASYMM8_SIGNED |F32 | + * |F32 |F32 | * * @param[in] input_box_encoding The bounding box input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32. * @param[in] input_score The class prediction input tensor. Data types supported: same as @p input_box_encoding. diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h index 44b70bbe85..95274bdb0c 100644 --- a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h +++ b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h @@ -53,6 +53,19 @@ public: /** Default move assignment operator */ NEElementwiseMax &operator=(NEElementwiseMax &&); /** Initialise the kernel's inputs, output and conversion policy. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |S32 |S32 |S32 | + * |S16 |S16 |S16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. @@ -100,6 +113,19 @@ public: /** Default move assignment operator */ NEElementwiseMin &operator=(NEElementwiseMin &&); /** Initialise the kernel's inputs, output and conversion policy. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |S32 |S32 |S32 | + * |S16 |S16 |S16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. @@ -147,6 +173,19 @@ public: /** Default move assignment operator */ NEElementwiseSquaredDiff &operator=(NEElementwiseSquaredDiff &&); /** Initialise the kernel's inputs, output and conversion policy. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |S32 |S32 |S32 | + * |S16 |S16 |S16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. @@ -194,6 +233,15 @@ public: /** Default move assignment operator */ NEElementwiseDivision &operator=(NEElementwiseDivision &&); /** Initialise the kernel's inputs, output and conversion policy. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in, out] input1 First tensor input. Data types supported: F16/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. @@ -242,6 +290,15 @@ public: /** Default move assignment operator */ NEElementwisePower &operator=(NEElementwisePower &&); /** Initialise the kernel's inputs, output and conversion policy. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in, out] input1 First tensor input. Data types supported: F16/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. @@ -289,6 +346,20 @@ public: /** Default move assignment operator */ NEElementwiseComparison &operator=(NEElementwiseComparison &&); /** Initialise the kernel's inputs, output and conversion policy. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:-----| + * |QASYMM8 |QASYMM8 |U8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |U8 | + * |S32 |S32 |U8 | + * |U8 |U8 |U8 | + * |S16 |S16 |U8 | + * |F16 |F16 |U8 | + * |F32 |F32 |U8 | * * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h index 4786f71cf8..63e47b8377 100644 --- a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h +++ b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h @@ -53,6 +53,16 @@ public: NEElementwiseUnaryLayer &operator=(NEElementwiseUnaryLayer &&); /** Initialize the function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | + * |S32 |S32 | * * @param[in] input Input tensor. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. * @param[out] output Output tensor. Data types supported: Same as @p input. diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h index 8c3ba4f0c8..b2ffd038de 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h @@ -64,6 +64,18 @@ public: /** Destructor */ ~NEGEMMConv2d(); /** Set the input and output tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 | * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h index c22ed1b5c4..fa5f5e3826 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h @@ -281,6 +281,16 @@ public: /** Default destructor */ ~NEGEMMLowpOutputStage(); /** Initialise the kernel's inputs, output + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:-------------| + * |S32 |S32 |QASYMM8 | + * |S32 |S32 |QASYMM8_SIGNED| + * |S32 |S32 |QSYMM16 | * * @param[in] input Input tensor. Data type supported: S32 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. diff --git a/arm_compute/runtime/NEON/functions/NELogical.h b/arm_compute/runtime/NEON/functions/NELogical.h index 04ffce6221..5cf5336f4f 100644 --- a/arm_compute/runtime/NEON/functions/NELogical.h +++ b/arm_compute/runtime/NEON/functions/NELogical.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -47,6 +47,14 @@ public: ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE_INC(NELogicalAnd) /** Initialise the kernel's inputs and output + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:------------| + * |U8 |U8 |U8 | * * @param[in] input1 First tensor input. Data type supported: U8. * @param[in] input2 Second tensor input. Data type supported: U8. @@ -82,6 +90,14 @@ public: ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE_INC(NELogicalOr) /** Initialise the kernel's inputs and output + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:------------| + * |U8 |U8 |U8 | * * @param[in] input1 First tensor input. Data type supported: U8. * @param[in] input2 Second tensor input. Data type supported: U8. @@ -117,6 +133,14 @@ public: ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE_INC(NELogicalNot) /** Initialise the kernel's inputs and output + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:-------------| + * |U8 |U8 | * * @param[in] input Input tensor. Data type supported: U8. * @param[out] output Output tensor. Data type supported: U8. diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h index 8a2ae10129..efe959f14e 100644 --- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h @@ -51,6 +51,17 @@ public: /** Default destructor */ ~NESoftmaxLayerGeneric(); /** Set the input and output tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | * * @param[in,out] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. If the width is not a * multiple of the internal processing block size, @ref NEFillBorder replicates the diff --git a/arm_compute/runtime/OperatorList.h b/arm_compute/runtime/OperatorList.h index a659a79423..e1c2bed41d 100644 --- a/arm_compute/runtime/OperatorList.h +++ b/arm_compute/runtime/OperatorList.h @@ -51,7 +51,7 @@ * */ -/** ArithmeticAddition (no CL) +/** ArithmeticAddition * * Description: * Function to add 2 tensors. @@ -61,7 +61,7 @@ * */ -/** ArithmeticSubtraction (no CL) +/** ArithmeticSubtraction * * Description: * Function to substract 2 tensors. @@ -161,7 +161,7 @@ * */ -/** Comparison (only CL) +/** Comparison * * Description: * Function to compare 2 tensors. @@ -216,7 +216,7 @@ * */ -/** Crop (only CL) +/** Crop * * Description: * Performs a copy of input tensor to the output tensor. @@ -246,7 +246,7 @@ * */ -/** DeconvolutionLayerUpsample (only CL) +/** DeconvolutionLayerUpsample * * Description: * Function to execute deconvolution upsample on OpenCL. @@ -296,7 +296,7 @@ * */ -/** DetectionPostProcessLayer (no CL) +/** DetectionPostProcessLayer * * Description: * Function to generate the detection output based on center size encoded boxes, class prediction and anchors by doing non maximum suppression (NMS). @@ -316,7 +316,7 @@ * */ -/** DirectDeconvolutionLayer (only CL) +/** DirectDeconvolutionLayer * * Description: * Function to run the deconvolution layer. @@ -326,7 +326,7 @@ * */ -/** ElementWiseOperations (skip) +/** ElementWiseOperations * * Description: * Function to perform in Cpu: @@ -336,7 +336,6 @@ * - Pow * - SquaredDiff * - Comparisons (Equal, greater, greater_equal, less, less_equal, not_equal) - * * Function to perform in CL: * - Add * - Sub @@ -351,18 +350,18 @@ * ANEURALNETWORKS_MINIMUM * ANEURALNETWORKS_POW * ANEURALNETWORKS_DIV - * ANEURALNETWORKS_ADD (only CL) - * ANEURALNETWORKS_SUB (only CL) - * ANEURALNETWORKS_EQUAL (no CL) - * ANEURALNETWORKS_GREATER (no CL) - * ANEURALNETWORKS_GREATER_EQUAL (no CL) - * ANEURALNETWORKS_LESS (no CL) - * ANEURALNETWORKS_LESS_EQUAL (no CL) - * ANEURALNETWORKS_NOT_EQUAL (no CL) + * ANEURALNETWORKS_ADD + * ANEURALNETWORKS_SUB + * ANEURALNETWORKS_EQUAL + * ANEURALNETWORKS_GREATER + * ANEURALNETWORKS_GREATER_EQUAL + * ANEURALNETWORKS_LESS + * ANEURALNETWORKS_LESS_EQUAL + * ANEURALNETWORKS_NOT_EQUAL * */ -/** ElementWiseOperationUnary (skip) +/** ElementwiseUnaryLayer * * Description: * Function to perform: @@ -494,7 +493,7 @@ * */ -/** GEMMConv2D (no CL) +/** GEMMConv2D * * Description: * General Matrix Multiplication. @@ -514,7 +513,7 @@ * */ -/** GEMMDeconvolutionLayer (only CL) +/** GEMMDeconvolutionLayer * * Description: * General Matrix Multiplication. @@ -574,7 +573,7 @@ * */ -/** Logical (no CL) +/** Logical * * Description: * Function to perform: @@ -587,7 +586,7 @@ * */ -/** LogicalAnd (only CL) +/** LogicalAnd * * Description: * Function to perform Logical AND. @@ -597,7 +596,7 @@ * */ -/** LogicalOr (only CL) +/** LogicalOr * * Description: * Function to perform Logical OR. @@ -607,7 +606,7 @@ * */ -/** LogicalNot (only CL) +/** LogicalNot * * Description: * Function to perform Logical NOT. @@ -724,7 +723,7 @@ /** PriorBoxLayer * * Description: - * Function to . + * Function to compute prior boxes and clip. * * Equivalent Android NNAPI Op: * n/a @@ -889,7 +888,7 @@ * */ -/** SoftmaxLayer (skip) +/** SoftmaxLayer * * Description: * Function to compute a SoftmaxLayer and a Log SoftmaxLayer. @@ -990,7 +989,7 @@ * */ -/** WinogradInputTransform (only CL) +/** WinogradInputTransform * * Description: * Function to. diff --git a/docs/09_operators_list.dox b/docs/09_operators_list.dox index 244f292f82..fc41265738 100644 --- a/docs/09_operators_list.dox +++ b/docs/09_operators_list.dox @@ -144,6 +144,62 @@ where N = batches, C = channels, H = height, W = width F16U32, S32 F32U32, S32 + + ArithmeticAddition + Function to add 2 tensors. + +
    +
  • ANEURALNETWORKS_ADD +
+ NEArithmeticAddition + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
QSYMM16QSYMM16QASYMM16 +
QSYMM16QSYMM16S32 +
U8U8U8 +
U8U8S16 +
U8S16S16 +
S16U8S16 +
S16S16S16 +
S32S32S32 +
F16F16F16 +
F32F32F32 +
+ + ArithmeticSubtraction + Function to substract 2 tensors. + +
    +
  • ANEURALNETWORKS_SUB +
+ NEArithmeticSubtraction + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
QSYMM16QSYMM16QASYMM16 +
QSYMM16QSYMM16S32 +
U8U8U8 +
U8U8S16 +
U8S16S16 +
S16U8S16 +
S16S16S16 +
S32S32S32 +
F16F16F16 +
F32F32F32 +
BatchNormalizationLayer Function to perform batch normalization. @@ -421,6 +477,28 @@ where N = batches, C = channels, H = height, W = width srcdst AllAll + + Comparison + Function to compare 2 tensors. + +
    +
  • ANEURALNETWORKS_EQUAL +
  • ANEURALNETWORKS_GREATER +
  • ANEURALNETWORKS_GREATER_EQUAL +
  • ANEURALNETWORKS_LESS +
  • ANEURALNETWORKS_LESS_EQUAL +
  • ANEURALNETWORKS_NOT_EQUAL +
+ CLComparison + +
    +
  • All +
+ + +
src0src1dst +
AllAllU8 +
ConcatenateLayer Function to concatenate tensors along a given axis. @@ -553,6 +631,23 @@ where N = batches, C = channels, H = height, W = width srcdst AllAll + + Crop + Performs a copy of input tensor to the output tensor. + +
    +
  • n/a +
+ CLCrop + +
    +
  • NHWC +
+ + +
srcdst +
AllF32 +
CropResize Function to perform cropping and resizing. @@ -621,6 +716,24 @@ where N = batches, C = channels, H = height, W = width QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED + + DeconvolutionLayerUpsample + Function to execute deconvolution upsample on OpenCL. + +
    +
  • ANEURALNETWORKS_TRANSPOSE_CONV_2D +
+ CLDeconvolutionLayerUpsample + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
AllAll +
DepthConvertLayer Performs a down-scaling depth conversion. @@ -706,39 +819,420 @@ where N = batches, C = channels, H = height, W = width -
src0src1src2dst -
F16F16F16F16 -
F32F32F32F32 -
QASYMM8QASYMM8S32QASYMM8 -
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 -
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED -
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
+ + CLDepthwiseConvolutionLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
+ + DequantizationLayer + Function to dequantize the values in a tensor. + +
    +
  • ANEURALNETWORKS_DEQUANTIZE +
+ NEDequantizationLayer + +
    +
  • All +
+ + +
srcdst +
QASYMM8F16, F32 +
QASYMM8_SIGNEDF16, F32 +
QSYMM8_PER_CHANNELF16, F32 +
QSYMM8F16, F32 +
QSYMM16F16, F32 +
+ + CLDequantizationLayer + +
    +
  • All +
+ + +
srcdst +
QASYMM8F16, F32 +
QASYMM8_SIGNEDF16, F32 +
QSYMM8_PER_CHANNELF16, F32 +
QSYMM8F16, F32 +
QSYMM16F16, F32 +
+ + DetectionPostProcessLayer + Function to generate the detection output based on center size encoded boxes, class prediction and anchors by doing non maximum suppression (NMS). + +
    +
  • ANEURALNETWORKS_DETECTION_POSTPROCESSING +
+ NEDetectionPostProcessLayer + +
    +
  • All +
+ + +
src0 - src2dst0 - dst3 +
QASYMM8F32 +
QASYMM8_SIGNEDF32 +
F32F32 +
+ + DirectConvolutionLayer + Function to compute direct convolution. + +
    +
  • ANEURALNETWORKS_CONV_2D +
+ NEDirectConvolutionLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
+ + CLDirectConvolutionLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
+ + DirectDeconvolutionLayer + Function to run the deconvolution layer. + +
    +
  • ANEURALNETWORKS_TRANSPOSE_CONV_2D +
+ CLDirectDeconvolutionLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
+ + ElementWiseOperations + Function to perform in Cpu: - Div - Max - Min - Pow - SquaredDiff - Comparisons (Equal, greater, greater_equal, less, less_equal, not_equal) Function to perform in CL: - Add - Sub - Div - Max - Min - Pow - SquaredDiff + +
    +
  • ANEURALNETWORKS_MAXIMUM +
  • ANEURALNETWORKS_MINIMUM +
  • ANEURALNETWORKS_POW +
  • ANEURALNETWORKS_DIV +
  • ANEURALNETWORKS_ADD +
  • ANEURALNETWORKS_SUB +
  • ANEURALNETWORKS_EQUAL +
  • ANEURALNETWORKS_GREATER +
  • ANEURALNETWORKS_GREATER_EQUAL +
  • ANEURALNETWORKS_LESS +
  • ANEURALNETWORKS_LESS_EQUAL +
  • ANEURALNETWORKS_NOT_EQUAL +
+ NEElementwiseMax + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
S32S32S32 +
S16S16S16 +
F16F16F16 +
F32F32F32 +
+ + NEElementwiseMin + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
S32S32S32 +
S16S16S16 +
F16F16F16 +
F32F32F32 +
+ + NEElementwiseSquaredDiff + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
S32S32S32 +
S16S16S16 +
F16F16F16 +
F32F32F32 +
+ + NEElementwiseDivision + +
    +
  • All +
+ + +
src0src1dst +
F16F16F16 +
F32F32F32 +
+ + NEElementwisePower + +
    +
  • All +
+ + +
src0src1dst +
F16F16F16 +
F32F32F32 +
+ + NEElementwiseComparison + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8U8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDU8 +
S32S32U8 +
U8U8U8 +
S16S16U8 +
F16F16U8 +
F32F32U8 +
+ + CLArithmeticAddition + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
QSYMM16QSYMM16QASYMM16 +
U8U8U8 +
U8U8S16 +
U8S16S16 +
S16U8S16 +
S16S16S16 +
S32S32S32 +
F16F16F16 +
F32F32F32 +
+ + CLArithmeticSubtraction + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
QSYMM16QSYMM16QASYMM16 +
U8U8U8 +
U8U8S16 +
U8S16S16 +
S16U8S16 +
S16S16S16 +
S32S32S32 +
F16F16F16 +
F32F32F32 +
+ + CLArithmeticDivision + +
    +
  • All +
+ + +
src0src1dst +
F16F16F16 +
F32F32F32 +
+ + CLElementwiseMax + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
QSYMM16QSYMM16QASYMM16 +
U8U8U8 +
S16S16S16 +
S32S32S32 +
U32U32U32 +
F16F16F16 +
F32F32F32 +
+ + CLElementwiseMin + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
QSYMM16QSYMM16QASYMM16 +
U8U8U8 +
S16S16S16 +
S32S32S32 +
U32U32U32 +
F16F16F16 +
F32F32F32 +
+ + CLElementwiseSquaredDiff + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
QSYMM16QSYMM16QASYMM16 +
U8U8U8 +
S16S16S16 +
F16F16F16 +
F32F32F32 +
+ + CLElementwisePower + +
    +
  • All +
+ + +
src0src1dst +
F16F16F16 +
F32F32F32 +
+ + ElementwiseUnaryLayer + Function to perform: - Rsqrt - Exp - Neg - Log - Abs - Round - Sin + +
    +
  • ANEURALNETWORKS_ABS +
  • ANEURALNETWORKS_EXP +
  • ANEURALNETWORKS_LOG +
  • ANEURALNETWORKS_NEG +
  • ANEURALNETWORKS_RSQRT +
  • ANEURALNETWORKS_SIN +
+ NEElementwiseUnaryLayer + +
    +
  • All +
+ + +
srcdst +
F16F16 +
F32F32 +
S32S32 +
+ + CLRsqrtLayer + +
    +
  • All +
+ + +
srcdst +
F16F16 +
F32F32
- CLDepthwiseConvolutionLayer + CLExpLayer
    -
  • NHWC -
  • NCHW +
  • All
-
src0src1src2dst -
F16F16F16F16 -
F32F32F32F32 -
QASYMM8QASYMM8S32QASYMM8 -
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 -
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED -
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
srcdst +
F16F16 +
F32F32
- DequantizationLayer - Function to dequantize the values in a tensor. - -
    -
  • ANEURALNETWORKS_DEQUANTIZE -
- NEDequantizationLayer + CLNegLayer
  • All @@ -746,14 +1240,11 @@ where N = batches, C = channels, H = height, W = width
    srcdst -
    QASYMM8F16, F32 -
    QASYMM8_SIGNEDF16, F32 -
    QSYMM8_PER_CHANNELF16, F32 -
    QSYMM8F16, F32 -
    QSYMM16F16, F32 +
    F16F16 +
    F32F32
    - CLDequantizationLayer + CLSinLayer
    • All @@ -761,45 +1252,44 @@ where N = batches, C = channels, H = height, W = width
      srcdst -
      QASYMM8F16, F32 -
      QASYMM8_SIGNEDF16, F32 -
      QSYMM8_PER_CHANNELF16, F32 -
      QSYMM8F16, F32 -
      QSYMM16F16, F32 +
      F16F16 +
      F32F32
      - DirectConvolutionLayer - Function to compute direct convolution. - + CLLogLayer +
        -
      • ANEURALNETWORKS_CONV_2D +
      • All
      - NEDirectConvolutionLayer + + +
      srcdst +
      F16F16 +
      F32F32 +
      + + CLAbsLayer
        -
      • NHWC -
      • NCHW +
      • All
      -
      src0src1src2dst -
      F16F16F16F16 -
      F32F32F32F32 +
      srcdst +
      F16F16 +
      F32F32
      - CLDirectConvolutionLayer + CLRoundLayer
        -
      • NHWC -
      • NCHW +
      • All
      -
      src0src1src2dst -
      F16F16F16F16 -
      F32F32F32F32 -
      QASYMM8QASYMM8S32QASYMM8 -
      QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
      srcdst +
      F16F16 +
      F32F32
      FFT1D @@ -1009,7 +1499,7 @@ where N = batches, C = channels, H = height, W = width
      • ANEURALNETWORKS_FULLY_CONNECTED
      - NEFullyConnectedLayerReshapeWeightsManaged + NEFullyConnectedLayer
      • NHWC @@ -1024,7 +1514,7 @@ where N = batches, C = channels, H = height, W = width QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED - CLFullyConnectedLayerReshapeWeightsManaged + CLFullyConnectedLayer
        • NHWC @@ -1118,7 +1608,7 @@ where N = batches, C = channels, H = height, W = width BFLOAT16BFLOAT16BFLOAT16BFLOAT16 - CLGEMMReshapeRHSMatrixKernelManaged + CLGEMM
          • All @@ -1129,6 +1619,27 @@ where N = batches, C = channels, H = height, W = width F32F32F32F32 F16F16F16F16 + + GEMMConv2D + General Matrix Multiplication. + +
              +
            • ANEURALNETWORKS_CONV_2D +
            + NEGEMMConv2d + +
              +
            • All +
            + + +
            src0src1src2dst +
            QASYMM8QASYMM8S32QASYMM8 +
            QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
            F16F16F16F16 +
            F32F32F32F32 +
            BFLOAT16BFLOAT16BFLOAT16BFLOAT16 +
            GEMMConvolutionLayer General Matrix Multiplication. @@ -1136,7 +1647,7 @@ where N = batches, C = channels, H = height, W = width
            • ANEURALNETWORKS_CONV_2D
            - NEConvolutionLayerReshapeWeights + NEGEMMConvolutionLayer
            • NHWC @@ -1154,7 +1665,7 @@ where N = batches, C = channels, H = height, W = width QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED - CLConvolutionLayerReshapeWeights + CLGEMMConvolutionLayer
              • NHWC @@ -1170,6 +1681,26 @@ where N = batches, C = channels, H = height, W = width QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED + + GEMMDeconvolutionLayer + General Matrix Multiplication. + +
                  +
                • ANEURALNETWORKS_TRANSPOSE_CONV_2D +
                + CLGEMMDeconvolutionLayer + +
                  +
                • NHWC +
                + + +
                src0src1src2dst +
                F16F16F16F16 +
                F32F32F32F32 +
                QASYMM8QASYMM8S32QASYMM8 +
                QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
                GEMMLowpMatrixMultiplyCore General Matrix Multiplication. @@ -1222,6 +1753,38 @@ where N = batches, C = channels, H = height, W = width QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32S32 QASYMM8_SIGNEDQSYMM8S32S32 + + GEMMLowpOutputStage + General Matrix Multiplication. + +
                  +
                • n/a +
                + NEGEMMLowpOutputStage + +
                  +
                • All +
                + + +
                src0src1dst +
                S32S32QASYMM8 +
                S32S32QASYMM8_SIGNED +
                S32S32QSYMM16 +
                + + CLGEMMLowpOutputStage + +
                  +
                • All +
                + + +
                src0src1dst +
                S32S32QASYMM8 +
                S32S32QASYMM8_SIGNED +
                S32S32QSYMM16 +
                GenerateProposalsLayer Function to generate proposals for a RPN (Region Proposal Network). @@ -1318,6 +1881,96 @@ where N = batches, C = channels, H = height, W = width F16F16 F32F32 + + Logical + Function to perform: - Logical AND - Logical OR - Logical NOT + +
                  +
                • n/a +
                + NELogicalAnd + +
                  +
                • All +
                + + +
                src0src1dst +
                U8U8U8 +
                + + NELogicalOr + +
                  +
                • All +
                + + +
                src0src1dst +
                U8U8U8 +
                + + NELogicalNot + +
                  +
                • All +
                + + +
                srcdst +
                U8U8 +
                + + LogicalAnd + Function to perform Logical AND. + +
                  +
                • n/a +
                + CLLogicalAnd + +
                  +
                • All +
                + + +
                src0src1dst +
                U8U8U8 +
                + + LogicalOr + Function to perform Logical OR. + +
                  +
                • n/a +
                + CLLogicalOr + +
                  +
                • All +
                + + +
                src0src1dst +
                U8U8U8 +
                + + LogicalNot + Function to perform Logical NOT. + +
                  +
                • n/a +
                + CLLogicalNot + +
                  +
                • All +
                + + +
                srcdst +
                U8U8 +
                LSTMLayer Function to perform a single time step in a Long Short-Term Memory (LSTM) layer. @@ -1660,7 +2313,7 @@ where N = batches, C = channels, H = height, W = width PriorBoxLayer - Function to . + Function to compute prior boxes and clip.
                • n/a @@ -2150,6 +2803,41 @@ where N = batches, C = channels, H = height, W = width srcdst AllAll + + SoftmaxLayer + Function to compute a SoftmaxLayer and a Log SoftmaxLayer. + +
                    +
                  • ANEURALNETWORKS_LOG_SOFTMAX +
                  • ANEURALNETWORKS_SOFTMAX +
                  + NESoftmaxLayerGeneric + +
                    +
                  • All +
                  + + +
                  srcdst +
                  QASYMM8QASYMM8 +
                  QASYMM8_SIGNEDQASYMM8_SIGNED +
                  F16F16 +
                  F32F32 +
                  + + CLSoftmaxLayerGeneric + +
                    +
                  • All +
                  + + +
                  srcdst +
                  QASYMM8QASYMM8 +
                  QASYMM8_SIGNEDQASYMM8_SIGNED +
                  F16F16 +
                  F32F32 +
                  SpaceToBatchLayer Function to divide a tensor spatially. @@ -2410,6 +3098,25 @@ where N = batches, C = channels, H = height, W = width F16F16F16F16 F32F32F32F32 + + WinogradInputTransform + Function to. + +
                    +
                  • n/a +
                  + CLWinogradInputTransform + +
                    +
                  • NHWC +
                  • NCHW +
                  + + +
                  srcdst +
                  F16F16 +
                  F32F32 +
                  */ -- cgit v1.2.1