aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSheri Zhang <sheri.zhang@arm.com>2021-05-04 14:03:13 +0100
committerSheri Zhang <sheri.zhang@arm.com>2021-05-05 09:38:50 +0000
commit6124ce60b54eb5639ed19d46c79fce21cca2c83b (patch)
tree9754cfdd6f1fed0eaa6866e04c1d4e4a57004810
parent0ec58215bf5322d370dbc7c0a7f3ced05af2174f (diff)
downloadComputeLibrary-6124ce60b54eb5639ed19d46c79fce21cca2c83b.tar.gz
Update operator list part3
Partially resolve: COMPMID-4199 Signed-off-by: Sheri Zhang <sheri.zhang@arm.com> Change-Id: Id24702d258fb4e04ad948e7cf6c0efd98d2a5456 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5561 Reviewed-by: TeresaARM <teresa.charlinreyes@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/runtime/CL/functions/CLComparison.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLCrop.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h9
-rw-r--r--arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h14
-rw-r--r--arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h63
-rw-r--r--arm_compute/runtime/CL/functions/CLElementwiseOperations.h124
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h13
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h12
-rw-r--r--arm_compute/runtime/CL/functions/CLLogicalAnd.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLLogicalNot.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLLogicalOr.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLSoftmaxLayer.h11
-rw-r--r--arm_compute/runtime/CL/functions/CLWinogradInputTransform.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NEArithmeticAddition.h3
-rw-r--r--arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h19
-rw-r--r--arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NEElementwiseOperations.h71
-rw-r--r--arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConv2d.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NELogical.h26
-rw-r--r--arm_compute/runtime/NEON/functions/NESoftmaxLayer.h11
-rw-r--r--arm_compute/runtime/OperatorList.h53
-rw-r--r--docs/09_operators_list.dox719
24 files changed, 1179 insertions, 67 deletions
diff --git a/arm_compute/runtime/CL/functions/CLComparison.h b/arm_compute/runtime/CL/functions/CLComparison.h
index 8cc3e96ec5..3f984900ee 100644
--- a/arm_compute/runtime/CL/functions/CLComparison.h
+++ b/arm_compute/runtime/CL/functions/CLComparison.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,6 +40,14 @@ class CLComparison : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs and outputs.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------|:--------|:--------|
+ * |All |All |U8 |
+ *
* @param[in] input1 Source tensor. Data types supported: All.
* The input1 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
* @param[in] input2 Source tensor. Data types supported: Same as @p input1.
diff --git a/arm_compute/runtime/CL/functions/CLCrop.h b/arm_compute/runtime/CL/functions/CLCrop.h
index dc509b5b84..d2b72a5eff 100644
--- a/arm_compute/runtime/CL/functions/CLCrop.h
+++ b/arm_compute/runtime/CL/functions/CLCrop.h
@@ -55,6 +55,14 @@ public:
*
* @note Supported tensor rank: up to 4
*
+ * Valid data layouts:
+ * - NHWC
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |F32 |
+ *
* @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC.
* @param[out] output Destination tensor. Data type supported: F32
* @param[in] start Coordinates of where to start cropping the image.
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
index 6c1302fbf7..344ebd0afb 100644
--- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
@@ -61,6 +61,15 @@ public:
/** Initialize the function's source, destination, interpolation type and border_mode.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in, out] input Source tensor. Data type supported: All.
* @param[out] output Destination tensor. Data type supported: same as @p input.
* @param[in] info Contains padding and policies to be used in the deconvolution.
diff --git a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h
index a23500e16b..567de13508 100644
--- a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h
@@ -87,6 +87,20 @@ public:
CLDirectDeconvolutionLayer &operator=(CLDirectDeconvolutionLayer &&) = default;
/** Set the input, weights, biases and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
* @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
* Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
* @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input or QSYMM8_PER_CHANNEL if @p input is QASYMM8/QASYMM8_SIGNED.
diff --git a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h
index fd6942cad5..79b79e89de 100644
--- a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h
+++ b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h
@@ -54,6 +54,15 @@ public:
CLRsqrtLayer &operator=(CLRsqrtLayer &&);
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in] input Input tensor. Data types supported: F16/F32.
* @param[out] output Output tensor. Data types supported: same as @p input.
*/
@@ -100,6 +109,15 @@ public:
CLExpLayer &operator=(CLExpLayer &&);
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in] input Input tensor. Data types supported: F16/F32.
* @param[out] output Output tensor. Data types supported: same as @p input.
*/
@@ -146,6 +164,15 @@ public:
CLNegLayer &operator=(CLNegLayer &&);
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in] input Input tensor. Data types supported: F16/F32.
* @param[out] output Output tensor. Data types supported: same as @p input.
*/
@@ -192,6 +219,15 @@ public:
CLSinLayer &operator=(CLSinLayer &&);
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in] input Input tensor. Data types supported: F16/F32.
* @param[out] output Output tensor. Data types supported: same as @p input.
*/
@@ -238,6 +274,15 @@ public:
CLLogLayer &operator=(CLLogLayer &&);
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in] input Input tensor. Data types supported: F16/F32.
* @param[out] output Output tensor. Data types supported: same as @p input.
*/
@@ -284,6 +329,15 @@ public:
CLAbsLayer &operator=(CLAbsLayer &&);
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in] input Input tensor. Data types supported: F16/F32.
* @param[out] output Output tensor. Data types supported: same as @p input.
*/
@@ -330,6 +384,15 @@ public:
CLRoundLayer &operator=(CLRoundLayer &&);
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in] input Input tensor. Data types supported: F16/F32.
* @param[out] output Output tensor. Data types supported: same as @p input.
*/
diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
index 2b291517f3..555e84a251 100644
--- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
+++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
@@ -55,19 +55,23 @@ public:
CLArithmeticAddition &operator=(CLArithmeticAddition &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
- * Valid configurations (Input1,Input2) -> Output :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (S16,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,S16) -> S16
- * - (S32,S32) -> S32
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- * - (QASYMM8,QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (QSYMM16,QSYMM16) -> QSYMM16
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |QSYMM16 |QSYMM16 |QASYMM16 |
+ * |U8 |U8 |U8 |
+ * |U8 |U8 |S16 |
+ * |U8 |S16 |S16 |
+ * |S16 |U8 |S16 |
+ * |S16 |S16 |S16 |
+ * |S32 |S32 |S32 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
*
* @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
@@ -161,19 +165,23 @@ public:
CLArithmeticSubtraction &operator=(CLArithmeticSubtraction &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
- * Valid configurations (Input1,Input2) -> Output :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (S16,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,S16) -> S16
- * - (S32,S32) -> S32
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- * - (QASYMM8,QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (QSYMM16,QSYMM16) -> QSYMM16
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |QSYMM16 |QSYMM16 |QASYMM16 |
+ * |U8 |U8 |U8 |
+ * |U8 |U8 |S16 |
+ * |U8 |S16 |S16 |
+ * |S16 |U8 |S16 |
+ * |S16 |S16 |S16 |
+ * |S32 |S32 |S32 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
*
* @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
@@ -267,6 +275,15 @@ public:
CLArithmeticDivision &operator=(CLArithmeticDivision &&);
/** Initialise the kernel's inputs, output.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
* @param[in, out] input2 Second tensor input. Same as @p input1.
@@ -327,6 +344,22 @@ public:
CLElementwiseMax &operator=(CLElementwiseMax &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |QSYMM16 |QSYMM16 |QASYMM16 |
+ * |U8 |U8 |U8 |
+ * |S16 |S16 |S16 |
+ * |S32 |S32 |S32 |
+ * |U32 |U32 |U32 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
* @param[in, out] input2 Second tensor input. Data types supported: same as @p input1.
@@ -387,6 +420,22 @@ public:
CLElementwiseMin &operator=(CLElementwiseMin &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |QSYMM16 |QSYMM16 |QASYMM16 |
+ * |U8 |U8 |U8 |
+ * |S16 |S16 |S16 |
+ * |S32 |S32 |S32 |
+ * |U32 |U32 |U32 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
* @param[in, out] input2 Second tensor input. Data types supported: same as @p input1.
@@ -447,6 +496,20 @@ public:
CLElementwiseSquaredDiff &operator=(CLElementwiseSquaredDiff &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |QSYMM16 |QSYMM16 |QASYMM16 |
+ * |U8 |U8 |U8 |
+ * |S16 |S16 |S16 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
* @param[in, out] input2 Second tensor input. Data types supported: same as @p input1.
@@ -507,6 +570,15 @@ public:
CLElementwisePower &operator=(CLElementwisePower &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
* @param[in, out] input2 Second tensor input. Data types supported: F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h
index 32af0f9427..6e482c98e7 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -93,6 +93,17 @@ public:
~CLGEMMDeconvolutionLayer();
/** Set the input, weights, biases and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:--------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ *
* @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. Data layout supported: NHWC
* @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. Data layout supported: same as @p input.
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
index 0f051ecffd..a60992a0f4 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
@@ -288,10 +288,20 @@ class CLGEMMLowpOutputStage : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs, output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:-------------|:-------------|
+ * |S32 |S32 |QASYMM8 |
+ * |S32 |S32 |QASYMM8_SIGNED|
+ * |S32 |S32 |QSYMM16 |
+ *
* @param[in] input Input tensor. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM16
* @param[in] info GEMMLowp output stage metadata.
*/
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info);
diff --git a/arm_compute/runtime/CL/functions/CLLogicalAnd.h b/arm_compute/runtime/CL/functions/CLLogicalAnd.h
index f7038ee97a..61a15816eb 100644
--- a/arm_compute/runtime/CL/functions/CLLogicalAnd.h
+++ b/arm_compute/runtime/CL/functions/CLLogicalAnd.h
@@ -87,6 +87,14 @@ public:
CLLogicalAnd &operator=(CLLogicalAnd &&);
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:-------------|:------------|
+ * |U8 |U8 |U8 |
+ *
* @param[in] input1 Input tensor. Data types supported: U8.
* @param[in] input2 Input tensor. Data types supported: same as @p input1.
* @param[out] output Output tensor. Data types supported: same as @p input1.
diff --git a/arm_compute/runtime/CL/functions/CLLogicalNot.h b/arm_compute/runtime/CL/functions/CLLogicalNot.h
index 772f16b942..27fd0f9c9f 100644
--- a/arm_compute/runtime/CL/functions/CLLogicalNot.h
+++ b/arm_compute/runtime/CL/functions/CLLogicalNot.h
@@ -58,6 +58,14 @@ public:
CLLogicalNot &operator=(CLLogicalNot &&);
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:------------|
+ * |U8 |U8 |
+ *
* @param[in] input Input tensor. Data types supported: U8.
* @param[out] output Output tensor. Data types supported: same as @p input.
*/
diff --git a/arm_compute/runtime/CL/functions/CLLogicalOr.h b/arm_compute/runtime/CL/functions/CLLogicalOr.h
index 948baee9d9..b9ffb4a449 100644
--- a/arm_compute/runtime/CL/functions/CLLogicalOr.h
+++ b/arm_compute/runtime/CL/functions/CLLogicalOr.h
@@ -87,6 +87,14 @@ public:
CLLogicalOr &operator=(CLLogicalOr &&);
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:-------------|:------------|
+ * |U8 |U8 |U8 |
+ *
* @param[in] input1 Input tensor. Data types supported: U8.
* @param[in] input2 Input tensor. Data types supported: same as @p input1.
* @param[out] output Output tensor. Data types supported: same as @p input1.
diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
index ddb35ae56f..721a47144e 100644
--- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
@@ -60,6 +60,17 @@ public:
~CLSoftmaxLayerGeneric();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
* @param[out] output Destination tensor. Data types supported: same as @p input
* @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f
diff --git a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h
index 8cd809cc1f..d644591b57 100644
--- a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h
+++ b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,6 +41,16 @@ class CLWinogradInputTransform : public ICLSimpleFunction
public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @note Winograd input transform supports the following configurations for NCWH data layout
* F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
* F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
index 734e3502dd..b8e46ff36e 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
@@ -52,8 +52,7 @@ public:
/** Initialise the kernel's inputs, output and conversion policy.
*
* Valid data layouts:
- * - NHWC
- * - NCHW
+ * - All
*
* Valid data type configurations:
* |src0 |src1 |dst |
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
index c741db3223..0c72e946f6 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
@@ -57,6 +57,25 @@ public:
NEArithmeticSubtraction &operator=(NEArithmeticSubtraction &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |QSYMM16 |QSYMM16 |QASYMM16 |
+ * |QSYMM16 |QSYMM16 |S32 |
+ * |U8 |U8 |U8 |
+ * |U8 |U8 |S16 |
+ * |U8 |S16 |S16 |
+ * |S16 |U8 |S16 |
+ * |S16 |S16 |S16 |
+ * |S32 |S32 |S32 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
* @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
* @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
diff --git a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
index d5c1f0ab6f..2affa8d49e 100644
--- a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -57,6 +57,16 @@ public:
~NEDetectionPostProcessLayer() = default;
/** Configure the detection output layer NE function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 - src2 |dst0 - dst3 |
+ * |:--------------|:--------------|
+ * |QASYMM8 |F32 |
+ * |QASYMM8_SIGNED |F32 |
+ * |F32 |F32 |
+ *
* @param[in] input_box_encoding The bounding box input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32.
* @param[in] input_score The class prediction input tensor. Data types supported: same as @p input_box_encoding.
* @param[in] input_anchors The anchors input tensor. Data types supported: same as @p input_box_encoding.
diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
index 44b70bbe85..95274bdb0c 100644
--- a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
+++ b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
@@ -54,6 +54,19 @@ public:
NEElementwiseMax &operator=(NEElementwiseMax &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |S32 |S32 |S32 |
+ * |S16 |S16 |S16 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: Same as @p input1.
@@ -101,6 +114,19 @@ public:
NEElementwiseMin &operator=(NEElementwiseMin &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |S32 |S32 |S32 |
+ * |S16 |S16 |S16 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: Same as @p input1.
@@ -148,6 +174,19 @@ public:
NEElementwiseSquaredDiff &operator=(NEElementwiseSquaredDiff &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |S32 |S32 |S32 |
+ * |S16 |S16 |S16 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: Same as @p input1.
@@ -195,6 +234,15 @@ public:
NEElementwiseDivision &operator=(NEElementwiseDivision &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: F16/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: Same as @p input1.
@@ -243,6 +291,15 @@ public:
NEElementwisePower &operator=(NEElementwisePower &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: F16/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: Same as @p input1.
@@ -290,6 +347,20 @@ public:
NEElementwiseComparison &operator=(NEElementwiseComparison &&);
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:-----|
+ * |QASYMM8 |QASYMM8 |U8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |U8 |
+ * |S32 |S32 |U8 |
+ * |U8 |U8 |U8 |
+ * |S16 |S16 |U8 |
+ * |F16 |F16 |U8 |
+ * |F32 |F32 |U8 |
+ *
* @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
* @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[out] output Output tensor. Data types supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
index 4786f71cf8..63e47b8377 100644
--- a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
@@ -54,6 +54,16 @@ public:
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ * |S32 |S32 |
+ *
* @param[in] input Input tensor. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
* @param[out] output Output tensor. Data types supported: Same as @p input.
*/
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
index 8c3ba4f0c8..b2ffd038de 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
@@ -65,6 +65,18 @@ public:
~NEGEMMConv2d();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:--------------|:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 |
+ *
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
index c22ed1b5c4..fa5f5e3826 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
@@ -282,6 +282,16 @@ public:
~NEGEMMLowpOutputStage();
/** Initialise the kernel's inputs, output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:-------------|:-------------|
+ * |S32 |S32 |QASYMM8 |
+ * |S32 |S32 |QASYMM8_SIGNED|
+ * |S32 |S32 |QSYMM16 |
+ *
* @param[in] input Input tensor. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
diff --git a/arm_compute/runtime/NEON/functions/NELogical.h b/arm_compute/runtime/NEON/functions/NELogical.h
index 04ffce6221..5cf5336f4f 100644
--- a/arm_compute/runtime/NEON/functions/NELogical.h
+++ b/arm_compute/runtime/NEON/functions/NELogical.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -48,6 +48,14 @@ public:
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:-------------|:------------|
+ * |U8 |U8 |U8 |
+ *
* @param[in] input1 First tensor input. Data type supported: U8.
* @param[in] input2 Second tensor input. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
@@ -83,6 +91,14 @@ public:
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:-------------|:------------|
+ * |U8 |U8 |U8 |
+ *
* @param[in] input1 First tensor input. Data type supported: U8.
* @param[in] input2 Second tensor input. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
@@ -118,6 +134,14 @@ public:
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:-------------|
+ * |U8 |U8 |
+ *
* @param[in] input Input tensor. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
*/
diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
index 8a2ae10129..efe959f14e 100644
--- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
@@ -52,6 +52,17 @@ public:
~NESoftmaxLayerGeneric();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in,out] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. If the width is not a
* multiple of the internal processing block size, @ref NEFillBorder replicates the
* last value of each row to the nearest multiple.
diff --git a/arm_compute/runtime/OperatorList.h b/arm_compute/runtime/OperatorList.h
index a659a79423..e1c2bed41d 100644
--- a/arm_compute/runtime/OperatorList.h
+++ b/arm_compute/runtime/OperatorList.h
@@ -51,7 +51,7 @@
*
*/
-/** ArithmeticAddition (no CL)
+/** ArithmeticAddition
*
* Description:
* Function to add 2 tensors.
@@ -61,7 +61,7 @@
*
*/
-/** ArithmeticSubtraction (no CL)
+/** ArithmeticSubtraction
*
* Description:
* Function to substract 2 tensors.
@@ -161,7 +161,7 @@
*
*/
-/** Comparison (only CL)
+/** Comparison
*
* Description:
* Function to compare 2 tensors.
@@ -216,7 +216,7 @@
*
*/
-/** Crop (only CL)
+/** Crop
*
* Description:
* Performs a copy of input tensor to the output tensor.
@@ -246,7 +246,7 @@
*
*/
-/** DeconvolutionLayerUpsample (only CL)
+/** DeconvolutionLayerUpsample
*
* Description:
* Function to execute deconvolution upsample on OpenCL.
@@ -296,7 +296,7 @@
*
*/
-/** DetectionPostProcessLayer (no CL)
+/** DetectionPostProcessLayer
*
* Description:
* Function to generate the detection output based on center size encoded boxes, class prediction and anchors by doing non maximum suppression (NMS).
@@ -316,7 +316,7 @@
*
*/
-/** DirectDeconvolutionLayer (only CL)
+/** DirectDeconvolutionLayer
*
* Description:
* Function to run the deconvolution layer.
@@ -326,7 +326,7 @@
*
*/
-/** ElementWiseOperations (skip)
+/** ElementWiseOperations
*
* Description:
* Function to perform in Cpu:
@@ -336,7 +336,6 @@
* - Pow
* - SquaredDiff
* - Comparisons (Equal, greater, greater_equal, less, less_equal, not_equal)
- *
* Function to perform in CL:
* - Add
* - Sub
@@ -351,18 +350,18 @@
* ANEURALNETWORKS_MINIMUM
* ANEURALNETWORKS_POW
* ANEURALNETWORKS_DIV
- * ANEURALNETWORKS_ADD (only CL)
- * ANEURALNETWORKS_SUB (only CL)
- * ANEURALNETWORKS_EQUAL (no CL)
- * ANEURALNETWORKS_GREATER (no CL)
- * ANEURALNETWORKS_GREATER_EQUAL (no CL)
- * ANEURALNETWORKS_LESS (no CL)
- * ANEURALNETWORKS_LESS_EQUAL (no CL)
- * ANEURALNETWORKS_NOT_EQUAL (no CL)
+ * ANEURALNETWORKS_ADD
+ * ANEURALNETWORKS_SUB
+ * ANEURALNETWORKS_EQUAL
+ * ANEURALNETWORKS_GREATER
+ * ANEURALNETWORKS_GREATER_EQUAL
+ * ANEURALNETWORKS_LESS
+ * ANEURALNETWORKS_LESS_EQUAL
+ * ANEURALNETWORKS_NOT_EQUAL
*
*/
-/** ElementWiseOperationUnary (skip)
+/** ElementwiseUnaryLayer
*
* Description:
* Function to perform:
@@ -494,7 +493,7 @@
*
*/
-/** GEMMConv2D (no CL)
+/** GEMMConv2D
*
* Description:
* General Matrix Multiplication.
@@ -514,7 +513,7 @@
*
*/
-/** GEMMDeconvolutionLayer (only CL)
+/** GEMMDeconvolutionLayer
*
* Description:
* General Matrix Multiplication.
@@ -574,7 +573,7 @@
*
*/
-/** Logical (no CL)
+/** Logical
*
* Description:
* Function to perform:
@@ -587,7 +586,7 @@
*
*/
-/** LogicalAnd (only CL)
+/** LogicalAnd
*
* Description:
* Function to perform Logical AND.
@@ -597,7 +596,7 @@
*
*/
-/** LogicalOr (only CL)
+/** LogicalOr
*
* Description:
* Function to perform Logical OR.
@@ -607,7 +606,7 @@
*
*/
-/** LogicalNot (only CL)
+/** LogicalNot
*
* Description:
* Function to perform Logical NOT.
@@ -724,7 +723,7 @@
/** PriorBoxLayer
*
* Description:
- * Function to .
+ * Function to compute prior boxes and clip.
*
* Equivalent Android NNAPI Op:
* n/a
@@ -889,7 +888,7 @@
*
*/
-/** SoftmaxLayer (skip)
+/** SoftmaxLayer
*
* Description:
* Function to compute a SoftmaxLayer and a Log SoftmaxLayer.
@@ -990,7 +989,7 @@
*
*/
-/** WinogradInputTransform (only CL)
+/** WinogradInputTransform
*
* Description:
* Function to.
diff --git a/docs/09_operators_list.dox b/docs/09_operators_list.dox
index 244f292f82..fc41265738 100644
--- a/docs/09_operators_list.dox
+++ b/docs/09_operators_list.dox
@@ -145,6 +145,62 @@ where N = batches, C = channels, H = height, W = width
<tr><td>F32<td>U32, S32
</table>
<tr>
+ <td rowspan="1">ArithmeticAddition
+ <td rowspan="1" style="width:200px;"> Function to add 2 tensors.
+ <td rowspan="1">
+ <ul>
+ <li>ANEURALNETWORKS_ADD
+ </ul>
+ <td>NEArithmeticAddition
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>QASYMM8<td>QASYMM8<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+ <tr><td>QSYMM16<td>QSYMM16<td>QASYMM16
+ <tr><td>QSYMM16<td>QSYMM16<td>S32
+ <tr><td>U8<td>U8<td>U8
+ <tr><td>U8<td>U8<td>S16
+ <tr><td>U8<td>S16<td>S16
+ <tr><td>S16<td>U8<td>S16
+ <tr><td>S16<td>S16<td>S16
+ <tr><td>S32<td>S32<td>S32
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td rowspan="1">ArithmeticSubtraction
+ <td rowspan="1" style="width:200px;"> Function to substract 2 tensors.
+ <td rowspan="1">
+ <ul>
+ <li>ANEURALNETWORKS_SUB
+ </ul>
+ <td>NEArithmeticSubtraction
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>QASYMM8<td>QASYMM8<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+ <tr><td>QSYMM16<td>QSYMM16<td>QASYMM16
+ <tr><td>QSYMM16<td>QSYMM16<td>S32
+ <tr><td>U8<td>U8<td>U8
+ <tr><td>U8<td>U8<td>S16
+ <tr><td>U8<td>S16<td>S16
+ <tr><td>S16<td>U8<td>S16
+ <tr><td>S16<td>S16<td>S16
+ <tr><td>S32<td>S32<td>S32
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
<td rowspan="2">BatchNormalizationLayer
<td rowspan="2" style="width:200px;"> Function to perform batch normalization.
<td rowspan="2">
@@ -422,6 +478,28 @@ where N = batches, C = channels, H = height, W = width
<tr><td>All<td>All
</table>
<tr>
+ <td rowspan="1">Comparison
+ <td rowspan="1" style="width:200px;"> Function to compare 2 tensors.
+ <td rowspan="1">
+ <ul>
+ <li>ANEURALNETWORKS_EQUAL
+ <li>ANEURALNETWORKS_GREATER
+ <li>ANEURALNETWORKS_GREATER_EQUAL
+ <li>ANEURALNETWORKS_LESS
+ <li>ANEURALNETWORKS_LESS_EQUAL
+ <li>ANEURALNETWORKS_NOT_EQUAL
+ </ul>
+ <td>CLComparison
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>All<td>All<td>U8
+ </table>
+<tr>
<td rowspan="2">ConcatenateLayer
<td rowspan="2" style="width:200px;"> Function to concatenate tensors along a given axis.
<td rowspan="2">
@@ -554,6 +632,23 @@ where N = batches, C = channels, H = height, W = width
<tr><td>All<td>All
</table>
<tr>
+ <td rowspan="1">Crop
+ <td rowspan="1" style="width:200px;"> Performs a copy of input tensor to the output tensor.
+ <td rowspan="1">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>CLCrop
+ <td>
+ <ul>
+ <li>NHWC
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>F32
+ </table>
+<tr>
<td rowspan="2">CropResize
<td rowspan="2" style="width:200px;"> Function to perform cropping and resizing.
<td rowspan="2">
@@ -622,6 +717,24 @@ where N = batches, C = channels, H = height, W = width
<tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
</table>
<tr>
+ <td rowspan="1">DeconvolutionLayerUpsample
+ <td rowspan="1" style="width:200px;"> Function to execute deconvolution upsample on OpenCL.
+ <td rowspan="1">
+ <ul>
+ <li>ANEURALNETWORKS_TRANSPOSE_CONV_2D
+ </ul>
+ <td>CLDeconvolutionLayerUpsample
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
<td rowspan="2">DepthConvertLayer
<td rowspan="2" style="width:200px;"> Performs a down-scaling depth conversion.
<td rowspan="2">
@@ -768,6 +881,25 @@ where N = batches, C = channels, H = height, W = width
<tr><td>QSYMM16<td>F16, F32
</table>
<tr>
+ <td rowspan="1">DetectionPostProcessLayer
+ <td rowspan="1" style="width:200px;"> Function to generate the detection output based on center size encoded boxes, class prediction and anchors by doing non maximum suppression (NMS).
+ <td rowspan="1">
+ <ul>
+ <li>ANEURALNETWORKS_DETECTION_POSTPROCESSING
+ </ul>
+ <td>NEDetectionPostProcessLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0 - src2<th>dst0 - dst3
+ <tr><td>QASYMM8<td>F32
+ <tr><td>QASYMM8_SIGNED<td>F32
+ <tr><td>F32<td>F32
+ </table>
+<tr>
<td rowspan="2">DirectConvolutionLayer
<td rowspan="2" style="width:200px;"> Function to compute direct convolution.
<td rowspan="2">
@@ -802,6 +934,364 @@ where N = batches, C = channels, H = height, W = width
<tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
</table>
<tr>
+ <td rowspan="1">DirectDeconvolutionLayer
+ <td rowspan="1" style="width:200px;"> Function to run the deconvolution layer.
+ <td rowspan="1">
+ <ul>
+ <li>ANEURALNETWORKS_TRANSPOSE_CONV_2D
+ </ul>
+ <td>CLDirectDeconvolutionLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32
+ <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+ <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
+ </table>
+<tr>
+ <td rowspan="13">ElementWiseOperations
+ <td rowspan="13" style="width:200px;"> Function to perform in Cpu: - Div - Max - Min - Pow - SquaredDiff - Comparisons (Equal, greater, greater_equal, less, less_equal, not_equal) Function to perform in CL: - Add - Sub - Div - Max - Min - Pow - SquaredDiff
+ <td rowspan="13">
+ <ul>
+ <li>ANEURALNETWORKS_MAXIMUM
+ <li>ANEURALNETWORKS_MINIMUM
+ <li>ANEURALNETWORKS_POW
+ <li>ANEURALNETWORKS_DIV
+ <li>ANEURALNETWORKS_ADD
+ <li>ANEURALNETWORKS_SUB
+ <li>ANEURALNETWORKS_EQUAL
+ <li>ANEURALNETWORKS_GREATER
+ <li>ANEURALNETWORKS_GREATER_EQUAL
+ <li>ANEURALNETWORKS_LESS
+ <li>ANEURALNETWORKS_LESS_EQUAL
+ <li>ANEURALNETWORKS_NOT_EQUAL
+ </ul>
+ <td>NEElementwiseMax
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>QASYMM8<td>QASYMM8<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+ <tr><td>S32<td>S32<td>S32
+ <tr><td>S16<td>S16<td>S16
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td>NEElementwiseMin
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>QASYMM8<td>QASYMM8<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+ <tr><td>S32<td>S32<td>S32
+ <tr><td>S16<td>S16<td>S16
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td>NEElementwiseSquaredDiff
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>QASYMM8<td>QASYMM8<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+ <tr><td>S32<td>S32<td>S32
+ <tr><td>S16<td>S16<td>S16
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td>NEElementwiseDivision
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td>NEElementwisePower
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td>NEElementwiseComparison
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>QASYMM8<td>QASYMM8<td>U8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>U8
+ <tr><td>S32<td>S32<td>U8
+ <tr><td>U8<td>U8<td>U8
+ <tr><td>S16<td>S16<td>U8
+ <tr><td>F16<td>F16<td>U8
+ <tr><td>F32<td>F32<td>U8
+ </table>
+<tr>
+ <td>CLArithmeticAddition
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>QASYMM8<td>QASYMM8<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+ <tr><td>QSYMM16<td>QSYMM16<td>QASYMM16
+ <tr><td>U8<td>U8<td>U8
+ <tr><td>U8<td>U8<td>S16
+ <tr><td>U8<td>S16<td>S16
+ <tr><td>S16<td>U8<td>S16
+ <tr><td>S16<td>S16<td>S16
+ <tr><td>S32<td>S32<td>S32
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td>CLArithmeticSubtraction
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>QASYMM8<td>QASYMM8<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+ <tr><td>QSYMM16<td>QSYMM16<td>QASYMM16
+ <tr><td>U8<td>U8<td>U8
+ <tr><td>U8<td>U8<td>S16
+ <tr><td>U8<td>S16<td>S16
+ <tr><td>S16<td>U8<td>S16
+ <tr><td>S16<td>S16<td>S16
+ <tr><td>S32<td>S32<td>S32
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td>CLArithmeticDivision
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td>CLElementwiseMax
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>QASYMM8<td>QASYMM8<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+ <tr><td>QSYMM16<td>QSYMM16<td>QASYMM16
+ <tr><td>U8<td>U8<td>U8
+ <tr><td>S16<td>S16<td>S16
+ <tr><td>S32<td>S32<td>S32
+ <tr><td>U32<td>U32<td>U32
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td>CLElementwiseMin
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>QASYMM8<td>QASYMM8<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+ <tr><td>QSYMM16<td>QSYMM16<td>QASYMM16
+ <tr><td>U8<td>U8<td>U8
+ <tr><td>S16<td>S16<td>S16
+ <tr><td>S32<td>S32<td>S32
+ <tr><td>U32<td>U32<td>U32
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td>CLElementwiseSquaredDiff
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>QASYMM8<td>QASYMM8<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+ <tr><td>QSYMM16<td>QSYMM16<td>QASYMM16
+ <tr><td>U8<td>U8<td>U8
+ <tr><td>S16<td>S16<td>S16
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td>CLElementwisePower
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td rowspan="8">ElementwiseUnaryLayer
+ <td rowspan="8" style="width:200px;"> Function to perform: - Rsqrt - Exp - Neg - Log - Abs - Round - Sin
+ <td rowspan="8">
+ <ul>
+ <li>ANEURALNETWORKS_ABS
+ <li>ANEURALNETWORKS_EXP
+ <li>ANEURALNETWORKS_LOG
+ <li>ANEURALNETWORKS_NEG
+ <li>ANEURALNETWORKS_RSQRT
+ <li>ANEURALNETWORKS_SIN
+ </ul>
+ <td>NEElementwiseUnaryLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ <tr><td>S32<td>S32
+ </table>
+<tr>
+ <td>CLRsqrtLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
+ <td>CLExpLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
+ <td>CLNegLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
+ <td>CLSinLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
+ <td>CLLogLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
+ <td>CLAbsLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
+ <td>CLRoundLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
<td rowspan="2">FFT1D
<td rowspan="2" style="width:200px;"> Fast Fourier Transform 1D.
<td rowspan="2">
@@ -1009,7 +1499,7 @@ where N = batches, C = channels, H = height, W = width
<ul>
<li>ANEURALNETWORKS_FULLY_CONNECTED
</ul>
- <td>NEFullyConnectedLayerReshapeWeightsManaged
+ <td>NEFullyConnectedLayer
<td>
<ul>
<li>NHWC
@@ -1024,7 +1514,7 @@ where N = batches, C = channels, H = height, W = width
<tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
</table>
<tr>
- <td>CLFullyConnectedLayerReshapeWeightsManaged
+ <td>CLFullyConnectedLayer
<td>
<ul>
<li>NHWC
@@ -1118,7 +1608,7 @@ where N = batches, C = channels, H = height, W = width
<tr><td>BFLOAT16<td>BFLOAT16<td>BFLOAT16<td>BFLOAT16
</table>
<tr>
- <td>CLGEMMReshapeRHSMatrixKernelManaged
+ <td>CLGEMM
<td>
<ul>
<li>All
@@ -1130,13 +1620,34 @@ where N = batches, C = channels, H = height, W = width
<tr><td>F16<td>F16<td>F16<td>F16
</table>
<tr>
+ <td rowspan="1">GEMMConv2D
+ <td rowspan="1" style="width:200px;"> General Matrix Multiplication.
+ <td rowspan="1">
+ <ul>
+ <li>ANEURALNETWORKS_CONV_2D
+ </ul>
+ <td>NEGEMMConv2d
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32
+ <tr><td>BFLOAT16<td>BFLOAT16<td>BFLOAT16<td>BFLOAT16
+ </table>
+<tr>
<td rowspan="2">GEMMConvolutionLayer
<td rowspan="2" style="width:200px;"> General Matrix Multiplication.
<td rowspan="2">
<ul>
<li>ANEURALNETWORKS_CONV_2D
</ul>
- <td>NEConvolutionLayerReshapeWeights
+ <td>NEGEMMConvolutionLayer
<td>
<ul>
<li>NHWC
@@ -1154,7 +1665,7 @@ where N = batches, C = channels, H = height, W = width
<tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
</table>
<tr>
- <td>CLConvolutionLayerReshapeWeights
+ <td>CLGEMMConvolutionLayer
<td>
<ul>
<li>NHWC
@@ -1171,6 +1682,26 @@ where N = batches, C = channels, H = height, W = width
<tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
</table>
<tr>
+ <td rowspan="1">GEMMDeconvolutionLayer
+ <td rowspan="1" style="width:200px;"> General Matrix Multiplication.
+ <td rowspan="1">
+ <ul>
+ <li>ANEURALNETWORKS_TRANSPOSE_CONV_2D
+ </ul>
+ <td>CLGEMMDeconvolutionLayer
+ <td>
+ <ul>
+ <li>NHWC
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32
+ <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+ </table>
+<tr>
<td rowspan="2">GEMMLowpMatrixMultiplyCore
<td rowspan="2" style="width:200px;"> General Matrix Multiplication.
<td rowspan="2">
@@ -1223,6 +1754,38 @@ where N = batches, C = channels, H = height, W = width
<tr><td>QASYMM8_SIGNED<td>QSYMM8<td>S32<td>S32
</table>
<tr>
+ <td rowspan="2">GEMMLowpOutputStage
+ <td rowspan="2" style="width:200px;"> General Matrix Multiplication.
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NEGEMMLowpOutputStage
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>S32<td>S32<td>QASYMM8
+ <tr><td>S32<td>S32<td>QASYMM8_SIGNED
+ <tr><td>S32<td>S32<td>QSYMM16
+ </table>
+<tr>
+ <td>CLGEMMLowpOutputStage
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>S32<td>S32<td>QASYMM8
+ <tr><td>S32<td>S32<td>QASYMM8_SIGNED
+ <tr><td>S32<td>S32<td>QSYMM16
+ </table>
+<tr>
<td rowspan="2">GenerateProposalsLayer
<td rowspan="2" style="width:200px;"> Function to generate proposals for a RPN (Region Proposal Network).
<td rowspan="2">
@@ -1319,6 +1882,96 @@ where N = batches, C = channels, H = height, W = width
<tr><td>F32<td>F32
</table>
<tr>
+ <td rowspan="3">Logical
+ <td rowspan="3" style="width:200px;"> Function to perform: - Logical AND - Logical OR - Logical NOT
+ <td rowspan="3">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NELogicalAnd
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>U8<td>U8<td>U8
+ </table>
+<tr>
+ <td>NELogicalOr
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>U8<td>U8<td>U8
+ </table>
+<tr>
+ <td>NELogicalNot
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>U8<td>U8
+ </table>
+<tr>
+ <td rowspan="1">LogicalAnd
+ <td rowspan="1" style="width:200px;"> Function to perform Logical AND.
+ <td rowspan="1">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>CLLogicalAnd
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>U8<td>U8<td>U8
+ </table>
+<tr>
+ <td rowspan="1">LogicalOr
+ <td rowspan="1" style="width:200px;"> Function to perform Logical OR.
+ <td rowspan="1">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>CLLogicalOr
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>U8<td>U8<td>U8
+ </table>
+<tr>
+ <td rowspan="1">LogicalNot
+ <td rowspan="1" style="width:200px;"> Function to perform Logical NOT.
+ <td rowspan="1">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>CLLogicalNot
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>U8<td>U8
+ </table>
+<tr>
<td rowspan="2">LSTMLayer
<td rowspan="2" style="width:200px;"> Function to perform a single time step in a Long Short-Term Memory (LSTM) layer.
<td rowspan="2">
@@ -1660,7 +2313,7 @@ where N = batches, C = channels, H = height, W = width
</table>
<tr>
<td rowspan="2">PriorBoxLayer
- <td rowspan="2" style="width:200px;"> Function to .
+ <td rowspan="2" style="width:200px;"> Function to compute prior boxes and clip.
<td rowspan="2">
<ul>
<li>n/a
@@ -2151,6 +2804,41 @@ where N = batches, C = channels, H = height, W = width
<tr><td>All<td>All
</table>
<tr>
+ <td rowspan="2">SoftmaxLayer
+ <td rowspan="2" style="width:200px;"> Function to compute a SoftmaxLayer and a Log SoftmaxLayer.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_LOG_SOFTMAX
+ <li>ANEURALNETWORKS_SOFTMAX
+ </ul>
+ <td>NESoftmaxLayerGeneric
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>QASYMM8<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
+ <td>CLSoftmaxLayerGeneric
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>QASYMM8<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
<td rowspan="2">SpaceToBatchLayer
<td rowspan="2" style="width:200px;"> Function to divide a tensor spatially.
<td rowspan="2">
@@ -2410,6 +3098,25 @@ where N = batches, C = channels, H = height, W = width
<tr><td>F16<td>F16<td>F16<td>F16
<tr><td>F32<td>F32<td>F32<td>F32
</table>
+<tr>
+ <td rowspan="1">WinogradInputTransform
+ <td rowspan="1" style="width:200px;"> Function to.
+ <td rowspan="1">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>CLWinogradInputTransform
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
</table>
*/