diff options
Diffstat (limited to 'arm_compute/runtime/NEON')
22 files changed, 243 insertions, 14 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h index ffda8406aa..b39a8d7701 100644 --- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h @@ -62,6 +62,18 @@ public: /** [NEActivationLayer snippet] **/ /** Set the input and output tensor. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 | + * |F16 |F16 | + * |F32 |F32 | + * * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h index 8f9fd27906..734e3502dd 100644 --- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h +++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h @@ -51,19 +51,25 @@ public: NEArithmeticAddition &operator=(NEArithmeticAddition &&); /** Initialise the kernel's inputs, output and conversion policy. * - * Valid configurations (Input1,Input2) -> Output : + * Valid data layouts: + * - NHWC + * - NCHW * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |QSYMM16 |QSYMM16 |S32 | + * |U8 |U8 |U8 | + * |U8 |U8 |S16 | + * |U8 |S16 |S16 | + * |S16 |U8 |S16 | + * |S16 |S16 |S16 | + * |S32 |S32 |S32 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h index 6aa724ab0c..dd1c709d76 100644 --- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h @@ -55,6 +55,17 @@ public: NEConcatenateLayer &operator=(NEConcatenateLayer &&); /** Initialise the kernel's inputs vector and output. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. * @note Preconditions can be found respectively at @ref cpu::kernels::CpuConcatenateWidthKernel, @ref cpu::kernels::CpuConcatenateHeightKernel, * @ref cpu::kernels::CpuConcatenateDepthKernel and @ref cpu::kernels::CpuConcatenateBatchKernel. diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h index f9ce66db13..218877d421 100644 --- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h +++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h @@ -51,6 +51,15 @@ public: ~NEConvertFullyConnectedWeights(); /** Initialize the function. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h index 56f8bd9803..ee02c259f4 100644 --- a/arm_compute/runtime/NEON/functions/NECopy.h +++ b/arm_compute/runtime/NEON/functions/NECopy.h @@ -53,6 +53,14 @@ public: NECopy &operator=(NECopy &&); /** Initialise the function's source and destination. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input Source tensor. Data types supported: All * @param[out] output Output tensor. Data types supported: Same as @p input. * diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h index a18566e6ca..dfec835f45 100644 --- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h @@ -53,6 +53,23 @@ public: NEDequantizationLayer &operator=(NEDequantizationLayer &&) = default; /** Configure the kernel. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------------------|:--------------| + * |QASYMM8 |F16 | + * |QASYMM8 |F32 | + * |QASYMM8_SIGNED |F16 | + * |QASYMM8_SIGNED |F32 | + * |QSYMM8_PER_CHANNEL |F16 | + * |QSYMM8_PER_CHANNEL |F32 | + * |QSYMM8 |F16 | + * |QSYMM8 |F32 | + * |QSYMM16 |F16 | + * |QSYMM16 |F32 | + * * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. */ diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h index fc4017e635..82cabed6c9 100644 --- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h @@ -58,6 +58,16 @@ public: ~NEDirectConvolutionLayer(); /** Set the input, weights, biases and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:------|:------|:------|:------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * * @note: DirectConvolution only works in the following configurations: * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32 * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32 diff --git a/arm_compute/runtime/NEON/functions/NEFFT1D.h b/arm_compute/runtime/NEON/functions/NEFFT1D.h index a533aa7f48..9654b1e604 100644 --- a/arm_compute/runtime/NEON/functions/NEFFT1D.h +++ b/arm_compute/runtime/NEON/functions/NEFFT1D.h @@ -63,6 +63,14 @@ public: ~NEFFT1D(); /** Initialise the function's source and destinations. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |F32 |F32 | + * * @param[in] input Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor). * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. * Number of channels supported: 1 (real tensor) or 2 (complex tensor).If @p input is real, @p output must be complex. diff --git a/arm_compute/runtime/NEON/functions/NEFFT2D.h b/arm_compute/runtime/NEON/functions/NEFFT2D.h index ce84a85105..57f38d1942 100644 --- a/arm_compute/runtime/NEON/functions/NEFFT2D.h +++ b/arm_compute/runtime/NEON/functions/NEFFT2D.h @@ -58,6 +58,14 @@ public: ~NEFFT2D(); /** Initialise the function's source and destinations * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |F32 |F32 | + * * @param[in] input Source tensor. Data types supported: F32. * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. * @param[in] config FFT related configuration diff --git a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h index 213fa6093b..c5f4d45b6b 100644 --- a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h @@ -73,6 +73,14 @@ public: ~NEFFTConvolutionLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |F32 |F32 | + * * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], diff --git a/arm_compute/runtime/NEON/functions/NEFill.h b/arm_compute/runtime/NEON/functions/NEFill.h index ba5d020496..e923ce33e1 100644 --- a/arm_compute/runtime/NEON/functions/NEFill.h +++ b/arm_compute/runtime/NEON/functions/NEFill.h @@ -53,6 +53,14 @@ public: NEFill &operator=(NEFill &&); /** Initialize the function * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | + * * @param[in,out] tensor Source tensor. Data types supported: All * @param[in] constant_value Constant value to use to fill tensor. */ diff --git a/arm_compute/runtime/NEON/functions/NEFloor.h b/arm_compute/runtime/NEON/functions/NEFloor.h index 9560eb9169..4d47b068db 100644 --- a/arm_compute/runtime/NEON/functions/NEFloor.h +++ b/arm_compute/runtime/NEON/functions/NEFloor.h @@ -54,6 +54,15 @@ public: ~NEFloor(); /** Set the source, destination of the kernel * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |F32 |F32 | + * |F16 |F16 | + * * @param[in] input Source tensor. Data type supported: F16/F32. * @param[out] output Destination tensor. Same as @p input */ diff --git a/arm_compute/runtime/NEON/functions/NEPReluLayer.h b/arm_compute/runtime/NEON/functions/NEPReluLayer.h index b07febfe7f..81d5fd162c 100644 --- a/arm_compute/runtime/NEON/functions/NEPReluLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPReluLayer.h @@ -55,6 +55,17 @@ public: NEPReluLayer &operator=(NEPReluLayer &&); /** Set the input and output tensor. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] alpha Source alpha tensor. Data types supported: same of @p input. * @param[out] output Destination tensor. Data type supported: same as @p input diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h index 2508458a3d..c863fde0ac 100644 --- a/arm_compute/runtime/NEON/functions/NEPermute.h +++ b/arm_compute/runtime/NEON/functions/NEPermute.h @@ -54,6 +54,15 @@ public: NEPermute &operator=(NEPermute &&) = default; /** Configure the permute function * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | + * * @note Arbitrary permutation vectors are supported with rank not greater than 4 * * @param[in] input The input tensor to permute. Data types supported: All diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h index 6f4cce3cde..f8074e791a 100644 --- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h +++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h @@ -52,6 +52,24 @@ public: NEPixelWiseMultiplication &operator=(NEPixelWiseMultiplication &&) = default; /** Initialise the kernel's inputs, output and convertion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |QSYMM16 |QSYMM16 |S32 | + * |U8 |U8 |U8 | + * |U8 |U8 |S16 | + * |U8 |S16 |S16 | + * |S16 |U8 |S16 | + * |S16 |S16 |S16 | + * |F16 |F16 |F16 | + * |F32 |S32 |F32 | + * * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. * diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h index cb136ebca9..851dc0ca32 100644 --- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h @@ -59,6 +59,18 @@ public: ~NEPoolingLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * * @note F16 is supported for pool sizes 2 and 3 only * * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h index 9e2d9ecf24..a7fadfc7cd 100644 --- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h @@ -52,6 +52,25 @@ public: NEQuantizationLayer &operator=(NEQuantizationLayer &&) = default; /** Set the input and output tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8 |QASYMM8_SIGNED | + * |QASYMM8 |QASYMM16 | + * |QASYMM8_SIGNED |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QASYMM16 | + * |F16 |QASYMM8 | + * |F16 |QASYMM8_SIGNED | + * |F16 |QASYMM16 | + * |F32 |QASYMM8 | + * |F32 |QASYMM8_SIGNED | + * |F32 |QASYMM16 | + * * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16 */ diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h index b8c0a841bc..3e6e33f797 100644 --- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h +++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h @@ -52,6 +52,14 @@ public: NEReshapeLayer &operator=(NEReshapeLayer &&); /** Initialise the kernel's inputs and outputs * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | + * * @param[in] input Input tensor. Data type supported: All * @param[out] output Output tensor. Data type supported: Same as @p input */ diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h index 45658a7cd3..233ee2969e 100644 --- a/arm_compute/runtime/NEON/functions/NEScale.h +++ b/arm_compute/runtime/NEON/functions/NEScale.h @@ -48,6 +48,20 @@ public: ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(NEScale); /** Initialize the function's source, destination, interpolation type and border_mode. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * |U8 |U8 | + * |S16 |S16 | + * * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) * @param[out] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] info @ref ScaleKernelInfo to be used for configuration diff --git a/arm_compute/runtime/NEON/functions/NESlice.h b/arm_compute/runtime/NEON/functions/NESlice.h index 28628778cb..214ffa512c 100644 --- a/arm_compute/runtime/NEON/functions/NESlice.h +++ b/arm_compute/runtime/NEON/functions/NESlice.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -89,6 +89,14 @@ public: /** Configure kernel * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | + * * @note Supported tensor rank: up to 4 * @note Start indices must be non-negative. 0 <= starts[i] * @note End coordinates can be negative, which represents the number of elements before the end of that dimension. diff --git a/arm_compute/runtime/NEON/functions/NEStridedSlice.h b/arm_compute/runtime/NEON/functions/NEStridedSlice.h index f9c94f5301..7ba6a52a58 100644 --- a/arm_compute/runtime/NEON/functions/NEStridedSlice.h +++ b/arm_compute/runtime/NEON/functions/NEStridedSlice.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -95,6 +95,14 @@ public: /** Configure kernel * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | + * * @note Supported tensor rank: up to 4 * * @param[in] input Source tensor. Data type supported: All diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h index 78916f67b7..581fe74309 100644 --- a/arm_compute/runtime/NEON/functions/NETranspose.h +++ b/arm_compute/runtime/NEON/functions/NETranspose.h @@ -54,6 +54,14 @@ public: NETranspose &operator=(NETranspose &&) = default; /** Initialise the kernel's inputs and output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | + * * @param[in] input Input tensor. Data types supported: All * @param[out] output Output tensor. Data type supported: Same as @p input */ |