From f6f7876e9ee8b58a8a6b335b032d554412fa3983 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Mon, 6 Jul 2020 11:27:21 +0100 Subject: COMPMID-3532: Align data type support between doxygen and implementation - CL Also removes some unused code. Change-Id: I85687c40999c3cdf9e6fccfcd020b0901a9515fe Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3581 Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- arm_compute/core/CL/CLHelpers.h | 8 - arm_compute/core/CL/CLKernels.h | 1 - arm_compute/core/CL/kernels/CLCannyEdgeKernel.h | 6 +- .../core/CL/kernels/CLChannelCombineKernel.h | 6 +- arm_compute/core/CL/kernels/CLConvolutionKernel.h | 6 +- arm_compute/core/CL/kernels/CLCopyKernel.h | 6 +- .../CLDepthwiseConvolutionLayer3x3NHWCKernel.h | 6 +- .../core/CL/kernels/CLElementwiseOperationKernel.h | 12 +- .../core/CL/kernels/CLGEMMLowpReductionKernel.h | 16 +- .../kernels/CLGEMMMatrixAccumulateBiasesKernel.h | 78 --------- .../core/CL/kernels/CLMagnitudePhaseKernel.h | 12 +- arm_compute/core/CL/kernels/CLPadLayerKernel.h | 5 +- .../CL/kernels/CLPixelWiseMultiplicationKernel.h | 70 +++++--- arm_compute/core/CL/kernels/CLReorgLayerKernel.h | 4 +- arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h | 93 +---------- .../core/CL/kernels/CLUpsampleLayerKernel.h | 6 +- arm_compute/runtime/CL/functions/CLComparison.h | 6 +- arm_compute/runtime/CL/functions/CLCopy.h | 6 +- .../runtime/CL/functions/CLElementwiseOperations.h | 178 +++++++++++++++------ arm_compute/runtime/CL/functions/CLFill.h | 4 +- arm_compute/runtime/CL/functions/CLFillBorder.h | 4 +- arm_compute/runtime/CL/functions/CLGEMM.h | 4 +- .../runtime/CL/functions/CLGEMMConvolutionLayer.h | 46 +++--- .../CL/functions/CLGEMMLowpMatrixMultiplyCore.h | 8 +- .../CL/functions/CLPixelWiseMultiplication.h | 71 +++++--- .../runtime/CL/functions/CLReductionOperation.h | 11 +- arm_compute/runtime/CL/functions/CLSoftmaxLayer.h | 38 +++-- arm_compute/runtime/CL/functions/CLUnstack.h | 12 +- arm_compute/runtime/CL/functions/CLUpsampleLayer.h | 8 +- 29 files changed, 345 insertions(+), 386 deletions(-) delete mode 100644 arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h (limited to 'arm_compute') diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h index f3d64155a6..cf18e16e34 100644 --- a/arm_compute/core/CL/CLHelpers.h +++ b/arm_compute/core/CL/CLHelpers.h @@ -97,14 +97,6 @@ std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt); */ std::string get_data_size_from_data_type(const DataType &dt); -/** Translates fixed point tensor data type to the underlying OpenCL type. - * - * @param[in] dt @ref DataType to be translated to OpenCL type. - * - * @return The string specifying the underlying OpenCL type to be used. - */ -std::string get_underlying_cl_type_from_data_type(const DataType &dt); - /** Helper function to get the GPU target from CL device * * @param[in] device A CL device diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h index ce3b325546..253df597e0 100644 --- a/arm_compute/core/CL/CLKernels.h +++ b/arm_compute/core/CL/CLKernels.h @@ -84,7 +84,6 @@ #include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" diff --git a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h index 5aec25259f..c4d0297aef 100644 --- a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h +++ b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h @@ -26,8 +26,6 @@ #include "arm_compute/core/CL/ICLKernel.h" -#include - namespace arm_compute { class ICLTensor; @@ -136,7 +134,7 @@ public: CLEdgeTraceKernel &operator=(const CLEdgeTraceKernel &) = delete; /** Initialise the kernel's source, destination and border mode. * - * @param[in] input Source tensor. Data types supported: U8. + * @param[in] input Source tensor. Data types supported: U16/U32. * @param[out] output Destination tensor. Data types supported: U8. * @param[in] upper_thr Upper threshold used for the hysteresis * @param[in] lower_thr Lower threshold used for the hysteresis @@ -154,7 +152,7 @@ public: /** Initialise the kernel's source, destination and border mode. * * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. + * @param[in] input Source tensor. Data types supported: U16/U32. * @param[out] output Destination tensor. Data types supported: U8. * @param[in] upper_thr Upper threshold used for the hysteresis * @param[in] lower_thr Lower threshold used for the hysteresis diff --git a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h index 3ba426cbfa..f9c33df7c1 100644 --- a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h +++ b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h @@ -57,7 +57,7 @@ public: * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. - * @param[out] output The single planar output tensor. + * @param[out] output The single planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422. */ void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); /** Configure function's inputs and outputs. @@ -75,7 +75,7 @@ public: * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. - * @param[out] output The multi planar output tensor. + * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422. */ void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); /** Configure function's inputs and outputs. @@ -84,7 +84,7 @@ public: * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. - * @param[out] output The multi planar output tensor. + * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422. */ void configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); diff --git a/arm_compute/core/CL/kernels/CLConvolutionKernel.h b/arm_compute/core/CL/kernels/CLConvolutionKernel.h index 4bf7c754b0..0f500fb63a 100644 --- a/arm_compute/core/CL/kernels/CLConvolutionKernel.h +++ b/arm_compute/core/CL/kernels/CLConvolutionKernel.h @@ -108,7 +108,7 @@ public: * * @param[in] compile_context The compile context to be used. * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: S16. + * @param[out] output Destination tensor, Data types supported: U16/S16/S32. * @param[in] conv Convolution matrix to apply to the input tensor. * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ @@ -135,7 +135,7 @@ class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel public: /** Initialise the kernel's input, output and border mode. * - * @param[in] input Source tensor. Data types supported: S16. + * @param[in] input Source tensor. Data types supported: U16/S16/S32. * @param[out] output Destination tensor, Data types supported: U8, S16. * @param[in] conv Convolution matrix to apply to the input tensor. * @param[in] scale Scale of the convolution matrix. @@ -146,7 +146,7 @@ public: /** Initialise the kernel's input, output and border mode. * * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: S16. + * @param[in] input Source tensor. Data types supported: U16/S16/S32. * @param[out] output Destination tensor, Data types supported: U8, S16. * @param[in] conv Convolution matrix to apply to the input tensor. * @param[in] scale Scale of the convolution matrix. diff --git a/arm_compute/core/CL/kernels/CLCopyKernel.h b/arm_compute/core/CL/kernels/CLCopyKernel.h index 1f0b5a40e4..11a6d54e90 100644 --- a/arm_compute/core/CL/kernels/CLCopyKernel.h +++ b/arm_compute/core/CL/kernels/CLCopyKernel.h @@ -47,7 +47,7 @@ public: CLCopyKernel &operator=(CLCopyKernel &&) = default; /** Initialize the kernel's input, output. * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor. Data types supported: All. * @param[out] output Destination tensor. Data types supported: same as @p input. * @param[in] padding (Optional) Padding to be applied to the input tensor * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. @@ -56,7 +56,7 @@ public: /** Initialize the kernel's input, output. * * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor. Data types supported: All. * @param[out] output Destination tensor. Data types supported: same as @p input. * @param[in] padding (Optional) Padding to be applied to the input tensor * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. @@ -64,7 +64,7 @@ public: void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding = PaddingList(), Window *output_window = nullptr); /** Static function to check if given info will lead to a valid configuration of @ref CLCopyKernel * - * @param[in] input Source tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor info. Data types supported: All. * @param[in] output Destination tensor info. Data types supported: same as @p input. * @param[in] padding (Optional) Padding to be applied to the input tensor * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h index d933a7eea5..4ca6c0bf4a 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h @@ -40,7 +40,7 @@ public: /** Default move assignment operator. */ /** Initialize the function's source, destination, conv and border_size. * - * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED. + * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. @@ -61,7 +61,7 @@ public: /** Initialize the function's source, destination, conv and border_size. * * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED. + * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. @@ -81,7 +81,7 @@ public: const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel * - * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED. + * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, 3, 3]. * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. diff --git a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h index 699ff5d7a9..1995aed7b6 100644 --- a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h +++ b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h @@ -113,7 +113,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel * * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32. + * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. * @param[in] output Output tensor. Data types supported: Same as @p input1. * @param[in] policy Policy to use to handle overflow. @@ -124,7 +124,7 @@ public: * * @param[in] compile_context The compile context to be used. * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32. + * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. * @param[in] output Output tensor. Data types supported: Same as @p input1. * @param[in] policy Policy to use to handle overflow. @@ -136,7 +136,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel * * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. * @param[in] output Output tensor info. Data types supported: Same as @p input1. * @param[in] policy Policy to use to handle overflow. @@ -170,7 +170,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel * * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32. + * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. * @param[in] output Output tensor. Data types supported: Same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. @@ -180,7 +180,7 @@ public: * * @param[in] compile_context The compile context to be used. * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32. + * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. * @param[in] output Output tensor. Data types supported: Same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. @@ -191,7 +191,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel * * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. * @param[in] output Output tensor info. Data types supported: Same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h index bb7461ca44..6066e2a815 100644 --- a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h @@ -48,7 +48,7 @@ public: /** Initialise the kernel's input and output. * - * @param[in] input Input tensor. Data type supported: S8 + * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32 * @param[in] info Kernel metadata: * - k Number of matrix columns/rows depending on the type of reduction. @@ -60,7 +60,7 @@ public: /** Initialise the kernel's input and output. * * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data type supported: S8 + * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32 * @param[in] info Kernel metadata: * - k Number of matrix columns/rows depending on the type of reduction. @@ -85,7 +85,7 @@ class CLGEMMLowpMatrixAReductionKernel : public ICLGEMMLowpReductionKernel public: /** Initialise the kernel's input and output. * - * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 * @param[in] info Kernel metadata: * - k Number of matrix columns/rows depending on the type of reduction. @@ -97,7 +97,7 @@ public: /** Initialise the kernel's input and output. * * @param[in] compile_context The compile context to be used. - * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 * @param[in] info Kernel metadata: * - k Number of matrix columns/rows depending on the type of reduction. @@ -108,7 +108,7 @@ public: void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override; /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixAReductionKernel * - * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 * @param[in] info Kernel metadata: * - k Number of matrix columns/rows depending on the type of reduction. @@ -134,7 +134,7 @@ class CLGEMMLowpMatrixBReductionKernel : public ICLGEMMLowpReductionKernel public: /** Initialise the kernel's input and output. * - * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 * @param[in] info Kernel metadata: * - k Number of matrix columns/rows depending on the type of reduction. @@ -146,7 +146,7 @@ public: /** Initialise the kernel's input and output. * * @param[in] compile_context The compile context to be used. - * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 * @param[in] info Kernel metadata: * - k Number of matrix columns/rows depending on the type of reduction. @@ -157,7 +157,7 @@ public: void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override; /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixBReductionKernel * - * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 * @param[in] info Kernel metadata: * - k Number of matrix columns/rows depending on the type of reduction. diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h deleted file mode 100644 index 3b4a0be0e8..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H -#define ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** Interface to add a bias to each row of the input tensor - * - */ -class CLGEMMMatrixAccumulateBiasesKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLGEMMMatrixAccumulateBiasesKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixAccumulateBiasesKernel(const CLGEMMMatrixAccumulateBiasesKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixAccumulateBiasesKernel &operator=(const CLGEMMMatrixAccumulateBiasesKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMMatrixAccumulateBiasesKernel(CLGEMMMatrixAccumulateBiasesKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMMatrixAccumulateBiasesKernel &operator=(CLGEMMMatrixAccumulateBiasesKernel &&) = default; - /** Set the accumulate buffer and the biases of the kernel. - * - * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32 - * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input - */ - void configure(ICLTensor *accum, const ICLTensor *biases); - /** Set the accumulate buffer and the biases of the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32 - * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input - */ - void configure(const CLCompileContext &compile_context, ICLTensor *accum, const ICLTensor *biases); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixAccumulateBiasesKernel - * - * @param[in] accum The accumulate tensor to convert. Data types supported: F16/F32 - * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input - * @param[in] gpu_target GPU target - * - * @return a status - */ - static Status validate(const ITensorInfo *accum, const ITensorInfo *biases, GPUTarget gpu_target); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_accum; - const ICLTensor *_biases; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h index 5f1d82ded1..a741b1745a 100644 --- a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h +++ b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h @@ -51,9 +51,9 @@ public: * * @note At least one of output1 or output2 must be set. * - * @param[in] gx The input gradient X tensor. Data types supported: S16. - * @param[in] gy The input gradient Y tensor. Data types supported: S16. - * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16. + * @param[in] gx The input gradient X tensor. Data types supported: S16/S32. + * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32. * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8. * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. @@ -65,9 +65,9 @@ public: * @note At least one of output1 or output2 must be set. * * @param[in] compile_context The compile context to be used. - * @param[in] gx The input gradient X tensor. Data types supported: S16. - * @param[in] gy The input gradient Y tensor. Data types supported: S16. - * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16. + * @param[in] gx The input gradient X tensor. Data types supported: S16/S32. + * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32. * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8. * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. diff --git a/arm_compute/core/CL/kernels/CLPadLayerKernel.h b/arm_compute/core/CL/kernels/CLPadLayerKernel.h index 3b78bb9834..5bf5841803 100644 --- a/arm_compute/core/CL/kernels/CLPadLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLPadLayerKernel.h @@ -25,7 +25,6 @@ #define ARM_COMPUTE_CLPADLAYERKERNEL_H #include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" namespace arm_compute { @@ -49,7 +48,7 @@ public: ~CLPadLayerKernel() = default; /** Set the input and output tensor. * - * @param[in] input Source tensor. Data types supported: U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32. + * @param[in] input Source tensor. Data types supported: All. * @param[out] output Output tensor. Data type supported: same as @p input * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] * specifies the front and the end padding in the i-th dimension. @@ -73,7 +72,7 @@ public: PaddingMode mode = PaddingMode::CONSTANT); /** Static function to check if given info will lead to a valid configuration of @ref CLPadLayerKernel * - * @param[in] input Source tensor info. Data types supported: U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32. + * @param[in] input Source tensor info. Data types supported: All. * @param[in] output Output tensor info. Data type supported: same as @p input * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] * specifies the front and the end padding in the i-th dimension. diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h index bb64060a12..bb98eb83bf 100644 --- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h +++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h @@ -47,18 +47,24 @@ public: /** Allow instances of this class to be moved */ CLPixelWiseMultiplicationKernel &operator=(CLPixelWiseMultiplicationKernel &&) = default; /** Initialise the kernel's input, output and border mode. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,U8) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * - (QSYMM16,QSYMM16) -> S32 * * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] input2 An input tensor. Data types supported: same as @p input1. - * @param[out] output The output tensor, Data types supported: - * - U8, only if both input are U8 - * - QASYMM8, only if both inputs are QASYMM8 - * - QASYMM8_SIGNED, only if both inputs are QASYMM8_SIGNED - * - S16 - * - QSYMM16, only if both inputs are QSYMM16 - * - S32, only if both inputs are QSYMM16 - * - F16 - * - F32 + * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate @@ -68,11 +74,25 @@ public: void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Initialise the kernel's input, output and border mode. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,U8) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * - (QSYMM16,QSYMM16) -> S32 * * @param[in] compile_context The compile context to be used. * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] input2 An input tensor. Data types supported: same as @p input1. - * @param[out] output The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8. + * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate @@ -82,18 +102,24 @@ public: void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplicationKernel + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,U8) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * - (QSYMM16,QSYMM16) -> S32 * * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] input2 An input tensor info. Data types supported: same as @p input1. - * @param[in] output The output tensor info, Data types supported: - * - U8, only if both input are U8 - * - QASYMM8, only if both inputs are QASYMM8 - * - QASYMM8_SIGNED, only if both inputs are QASYMM8_SIGNED - * - S16 - * - QSYMM16, only if both inputs are QSYMM16 - * - S32, only if both inputs are QSYMM16 - * - F16 - * - F32 + * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate diff --git a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h b/arm_compute/core/CL/kernels/CLReorgLayerKernel.h index 279d891497..e3edc9f724 100644 --- a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLReorgLayerKernel.h @@ -47,7 +47,7 @@ public: CLReorgLayerKernel &operator=(CLReorgLayerKernel &&) = default; /** Initialize the kernel's input, output. * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor. Data types supported: All. * @param[out] output Destination tensor with tensor shape: * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has * the same number of input elements. Data types supported: same as @p input. @@ -58,7 +58,7 @@ public: /** Initialize the kernel's input, output. * * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor. Data types supported: All. * @param[out] output Destination tensor with tensor shape: * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has * the same number of input elements. Data types supported: same as @p input. diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h index 5297af2c1c..f8c1019d53 100644 --- a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h @@ -27,93 +27,10 @@ #include "arm_compute/core/CL/ICLSimple3DKernel.h" #include "arm_compute/core/KernelDescriptors.h" -#include - namespace arm_compute { class ICLTensor; -/** Interface for the identifying the max value of 1D Logits */ -class CLLogits1DMaxKernel : public ICLSimple3DKernel -{ -public: - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 - * @param[out] output Destination tensor. Data types supported: same as @p input - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 - * @param[out] output Destination tensor. Data types supported: same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxKernel - * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 - * @param[in] output Destination tensor. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; - -/** Interface for shifting, exponentiating and summing the logits */ -class CLLogits1DShiftExpSumKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLLogits1DShiftExpSumKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLogits1DShiftExpSumKernel(const CLLogits1DShiftExpSumKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLogits1DShiftExpSumKernel &operator=(const CLLogits1DShiftExpSumKernel &) = delete; - /** Allow instances of this class to be moved */ - CLLogits1DShiftExpSumKernel(CLLogits1DShiftExpSumKernel &&) = default; - /** Allow instances of this class to be moved */ - CLLogits1DShiftExpSumKernel &operator=(CLLogits1DShiftExpSumKernel &&) = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 - * @param[in] max Max values tensor. Data types supported: same as @p input - * @param[out] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input - * @param[out] sum Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input - * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.0 - */ - void configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 - * @param[in] max Max values tensor. Data types supported: same as @p input - * @param[out] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input - * @param[out] sum Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input - * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.0 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f); - /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DShiftExpSumKernel - * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 - * @param[in] max Max values tensor. Data types supported: same as @p input - * @param[in] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input - * @param[in] sum Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_max; - ICLTensor *_output; - ICLTensor *_sum; -}; - /** Interface for max, shifting, exponentiating and summing the logits */ class CLLogits1DMaxShiftExpSumKernel : public ICLKernel { @@ -134,7 +51,7 @@ public: CLLogits1DMaxShiftExpSumKernel &operator=(CLLogits1DMaxShiftExpSumKernel &&) = default; /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: F16/F32 + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 * @param[in,out] max Max values tensor. Data types supported: same as @p input * @param[out] output Destination tensor. Data types supported: same as @p input * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input @@ -144,7 +61,7 @@ public: /** Set the input and output tensors. * * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: F16/F32 + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 * @param[in,out] max Max values tensor. Data types supported: same as @p input * @param[out] output Destination tensor. Data types supported: same as @p input * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input @@ -153,7 +70,7 @@ public: void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxShiftExpSumKernel * - * @param[in] input Source tensor. Data types supported: F16/F32 + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 * @param[in] max Max values tensor. Data types supported: same as @p input * @param[in] output Destination tensor. Data types supported: same as @p input * @param[in] sum Sum of 1D logits tensor. Data types supported: same as @p input @@ -205,7 +122,7 @@ public: * * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported. * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input - * @param[out] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input + * @param[out] output Destination tensor. Data types supported: QASYMM8/QASYMM8_SIGNED for S32 @p input, or same as @p input * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. */ void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info); @@ -214,7 +131,7 @@ public: * @param[in] compile_context The compile context to be used. * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported. * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input - * @param[out] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input + * @param[out] output Destination tensor. Data types supported: QASYMM8/QASYMM8_SIGNED for S32 @p input, or same as @p input * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. */ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info); diff --git a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h b/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h index d8e1ae113d..b523b97233 100644 --- a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h @@ -49,7 +49,7 @@ public: /** Initialise the kernel's input and output. * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] input Source tensor. Data types supported: All. * @param[out] output Destination tensor. Data types supported: same as @p input. * @param[in] info Contains stride information described in @ref Size2D. * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. @@ -58,7 +58,7 @@ public: /** Initialise the kernel's input and output. * * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] input Source tensor. Data types supported: All. * @param[out] output Destination tensor. Data types supported: same as @p input. * @param[in] info Contains stride information described in @ref Size2D. * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. @@ -66,7 +66,7 @@ public: void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy); /** Static function to check if given info will lead to a valid configuration of @ref CLUpsampleLayerKernel * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] input Source tensor info. Data types supported: All. * @param[in] output Destination tensor info. Data types supported: same as @p input. * @param[in] info Contains stride information described in @ref Size2D. * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. diff --git a/arm_compute/runtime/CL/functions/CLComparison.h b/arm_compute/runtime/CL/functions/CLComparison.h index ac6a41ef2e..c6d61e45f2 100644 --- a/arm_compute/runtime/CL/functions/CLComparison.h +++ b/arm_compute/runtime/CL/functions/CLComparison.h @@ -79,7 +79,7 @@ public: public: /** Initialise the kernel's inputs and outputs. * - * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input1 Source tensor. Data types supported: All. * The input1 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. * @param[in] input2 Source tensor. Data types supported: Same as @p input1. * The input2 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. @@ -92,7 +92,7 @@ public: /** Initialise the kernel's inputs and outputs. * * @param[in] compile_context The compile context to be used. - * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input1 Source tensor. Data types supported: All. * The input1 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. * @param[in] input2 Source tensor. Data types supported: Same as @p input1. * The input2 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. @@ -101,7 +101,7 @@ public: void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLComparison * - * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input1 Source tensor. Data types supported: All. * @param[in] input2 Source tensor. Data types supported: Same as @p input1. * @param[in] output Destination tensor. Data types supported: U8. * diff --git a/arm_compute/runtime/CL/functions/CLCopy.h b/arm_compute/runtime/CL/functions/CLCopy.h index ee5f31bc4c..c20d75eea8 100644 --- a/arm_compute/runtime/CL/functions/CLCopy.h +++ b/arm_compute/runtime/CL/functions/CLCopy.h @@ -38,7 +38,7 @@ class CLCopy : public ICLSimpleFunction public: /** Initialise the function's source and destination. * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor. Data types supported: All. * @param[out] output Output tensor. Data types supported: Same as @p input. * */ @@ -46,14 +46,14 @@ public: /** Initialise the function's source and destination. * * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor. Data types supported: All. * @param[out] output Output tensor. Data types supported: Same as @p input. * */ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLCopy * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor. Data types supported: All. * @param[in] output Output tensor. Data types supported: Same as @p input. * * @return a status diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h index 8cf1ca83c5..9cd3c150cc 100644 --- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h +++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h @@ -33,7 +33,7 @@ class ICLTensor; /** Basic function to run @ref CLSaturatedArithmeticOperationKernel for addition * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. + * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * @note The function performs an arithmetic addition between two tensors. */ class CLArithmeticAddition : public ICLSimpleFunction @@ -41,32 +41,74 @@ class CLArithmeticAddition : public ICLSimpleFunction public: /** Initialise the kernel's inputs, output and conversion policy. * - * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (S16,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,S16) -> S16 + * - (S32,S32) -> S32 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QSYMM16 (only if @p input1 is QSYMM16), S16/F16/F32. + * @param[in, out] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QSYMM16 (only if both inputs is QSYMM16), S16/F16/F32. + * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * @param[in] policy Policy to use to handle overflow. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Initialise the kernel's inputs, output and conversion policy. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (S16,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,S16) -> S16 + * - (S32,S32) -> S32 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 * * @param[in] compile_context The compile context to be used. - * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QSYMM16 (only if @p input1 is QSYMM16), S16/F16/F32. + * @param[in, out] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QSYMM16 (only if both inputs is QSYMM16), S16/F16/F32. + * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * @param[in] policy Policy to use to handle overflow. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel for addition * - * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QSYMM16 (only if @p input1 is QSYMM16), S16/F16/F32. - * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QSYMM16 (only if both inputs is QSYMM16), S16/F16/F32. + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (S16,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,S16) -> S16 + * - (S32,S32) -> S32 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. + * @param[in] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * @param[in] policy Policy to use to handle overflow. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @@ -77,7 +119,7 @@ public: /** Basic function to run @ref CLSaturatedArithmeticOperationKernel for subtraction * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/U32/F16/F32. + * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32. * @note The function performs an arithmetic subtraction between two tensors. */ class CLArithmeticSubtraction : public ICLSimpleFunction @@ -85,32 +127,74 @@ class CLArithmeticSubtraction : public ICLSimpleFunction public: /** Initialise the kernel's inputs, output and conversion policy. * - * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32. + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (S16,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,S16) -> S16 + * - (S32,S32) -> S32 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32. + * @param[in, out] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32. + * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * @param[in] policy Policy to use to handle overflow. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Initialise the kernel's inputs, output and conversion policy. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (S16,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,S16) -> S16 + * - (S32,S32) -> S32 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 * * @param[in] compile_context The compile context to be used. - * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32. + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32. + * @param[in, out] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32. + * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * @param[in] policy Policy to use to handle overflow. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel for subtraction * - * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32. - * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16/F16/F32. + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (S16,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,S16) -> S16 + * - (S32,S32) -> S32 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. + * @param[in] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. * @param[in] policy Policy to use to handle overflow. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @@ -170,30 +254,30 @@ class CLElementwiseMax : public ICLSimpleFunction public: /** Initialise the kernel's inputs, output and conversion policy. * - * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32. + * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32. + * @param[out] output Output tensor. Data types supported: same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Initialise the kernel's inputs, output and conversion policy. * * @param[in] compile_context The compile context to be used. - * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32. + * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32. + * @param[out] output Output tensor. Data types supported: same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for max * - * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32. - * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: same as @p input1. + * @param[in] output Output tensor info. Data types supported: same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * * @return a status @@ -211,30 +295,30 @@ class CLElementwiseMin : public ICLSimpleFunction public: /** Initialise the kernel's inputs, output and conversion policy. * - * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32. + * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32. + * @param[out] output Output tensor. Data types supported: same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Initialise the kernel's inputs, output and conversion policy. * * @param[in] compile_context The compile context to be used. - * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32. + * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32. + * @param[out] output Output tensor. Data types supported: same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for min * - * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32. - * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: same as @p input1. + * @param[in] output Output tensor info. Data types supported: same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * * @return a status @@ -252,30 +336,30 @@ class CLElementwiseSquaredDiff : public ICLSimpleFunction public: /** Initialise the kernel's inputs, output and conversion policy. * - * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32. + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32. + * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32. + * @param[out] output Output tensor. Data types supported: same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Initialise the kernel's inputs, output and conversion policy. * * @param[in] compile_context The compile context to be used. - * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32. + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32. + * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32. + * @param[out] output Output tensor. Data types supported: same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for squared difference * - * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32. - * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: same as @p input1. + * @param[in] output Output tensor info. Data types supported: same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * * @return a status diff --git a/arm_compute/runtime/CL/functions/CLFill.h b/arm_compute/runtime/CL/functions/CLFill.h index 99cae077fe..b79b234158 100644 --- a/arm_compute/runtime/CL/functions/CLFill.h +++ b/arm_compute/runtime/CL/functions/CLFill.h @@ -38,14 +38,14 @@ class CLFill : public ICLSimpleFunction public: /** Initialize the function * - * @param[in,out] tensor Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in,out] tensor Source tensor. Data types supported: All. * @param[in] constant_value Constant value to use to fill tensor. */ void configure(ICLTensor *tensor, PixelValue constant_value); /** Initialize the function * * @param[in] compile_context The compile context to be used. - * @param[in,out] tensor Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in,out] tensor Source tensor. Data types supported: All. * @param[in] constant_value Constant value to use to fill tensor. */ void configure(const CLCompileContext &compile_context, ICLTensor *tensor, PixelValue constant_value); diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h index e552c353db..18bc20e654 100644 --- a/arm_compute/runtime/CL/functions/CLFillBorder.h +++ b/arm_compute/runtime/CL/functions/CLFillBorder.h @@ -38,7 +38,7 @@ class CLFillBorder : public ICLSimpleFunction public: /** Initialize the function * - * @param[in,out] tensor Source tensor. Data types supported: U8/S16 + * @param[in,out] tensor Source tensor. Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32. * @param[in] border_width The border width * @param[in] border_mode Strategy to use for borders. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. @@ -47,7 +47,7 @@ public: /** Initialize the function * * @param[in] compile_context The compile context to be used. - * @param[in,out] tensor Source tensor. Data types supported: U8/S16 + * @param[in,out] tensor Source tensor. Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32. * @param[in] border_width The border width * @param[in] border_mode Strategy to use for borders. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h index 018d88d366..8e4d3906d1 100644 --- a/arm_compute/runtime/CL/functions/CLGEMM.h +++ b/arm_compute/runtime/CL/functions/CLGEMM.h @@ -74,7 +74,7 @@ public: /** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel * - * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: All * @param[in] info RHS matrix information to be used for reshaping. */ void configure(const ICLTensor *input, GEMMRHSMatrixInfo info) @@ -85,7 +85,7 @@ public: /** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel * * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: All * @param[in] info RHS matrix information to be used for reshaping. */ void configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info) diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h index be9283b6ec..277b27f690 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h @@ -98,8 +98,8 @@ class CLConvolutionLayerReshapeWeightsTransform : public ITransformWeights public: /** Configures the @ref CLConvolutionLayerReshapeWeights function * - * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] biases Biases tensor. Data type supported: Same as @p input. + * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32. + * @param[in] biases Biases tensor. Data type supported: same as @p input, S32 if @p input is quantized. * @param[in] num_groups Number of groups when performing a grouped convolution. */ void configure(const ICLTensor *input, const ICLTensor *biases, unsigned int num_groups) @@ -109,8 +109,8 @@ public: /** Configures the @ref CLConvolutionLayerReshapeWeights function * * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] biases Biases tensor. Data type supported: Same as @p input. + * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32. + * @param[in] biases Biases tensor. Data type supported: same as @p input, S32 if @p input is quantized. * @param[in] num_groups Number of groups when performing a grouped convolution. */ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, unsigned int num_groups) @@ -183,11 +183,11 @@ public: * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/F16/F32. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED. * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type. + * Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type. * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. @@ -204,11 +204,11 @@ public: * @param[in] compile_context The compile context to be used. * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/F16/F32. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED. * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type. + * Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type. * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. @@ -225,11 +225,11 @@ public: * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/F16/F32. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED. * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type. + * Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type. * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. @@ -252,12 +252,12 @@ private: /** Configures the appropriate matrix multiply routine * * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32. - * @param[in] weights Weights tensor. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or + * QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED. * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type. - * @param[in, out] output Output tensor. Data types supported: Same as @p input, - * except for input of QASYMM8 type where output should be of S32 type. + * Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type. + * @param[in, out] output Output tensor. Data types supported: same as @p input. * @param[in] gemmlowp_output_stage GEMMLowp output stage info * @param[in] gemm_3d_depth Depth of GEMM 3D * @param[in] act_info Activation to apply after the matrix multiplication @@ -267,12 +267,12 @@ private: int gemm_3d_depth, const ActivationLayerInfo &act_info); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer matrix multiply routines * - * @param[in] input Input tensor info. Data types supported: QASYMM8/F16/F32. - * @param[in] weights Weights tensor info. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor info. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or + * QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED. * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type. - * @param[in] output Output tensor info. Data types supported: Same as @p input, - * except for input of QASYMM8 type where output should be of S32 type. + * Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type. + * @param[in] output Output tensor info. Data types supported: same as @p input. * @param[in] gemmlowp_output_stage GEMMLowp output stage info * @param[in] gemm_3d_depth Depth of GEMM 3D * @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h index 0e822bc5a2..57b1e30df5 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h @@ -59,8 +59,8 @@ public: * @note GEMMLowp: low precision GEMM kernel. [A * B + C] * This kernel performs the following computations: * - * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them. - * -# Convert b values from QASYMM8 to int32 and add b_offset to each of them. + * -# Convert a values from 8-bit quantized to int32 and add a_offset to each of them. + * -# Convert b values from 8-bit quantized to int32 and add b_offset to each of them. * -# Compute the matrix product of the resulting a * b in int32. * -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE * @@ -77,8 +77,8 @@ public: * @note GEMMLowp: low precision GEMM kernel. [A * B + C] * This kernel performs the following computations: * - * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them. - * -# Convert b values from QASYMM8 to int32 and add b_offset to each of them. + * -# Convert a values from 8-bit quantized to int32 and add a_offset to each of them. + * -# Convert b values from 8-bit quantized to int32 and add b_offset to each of them. * -# Compute the matrix product of the resulting a * b in int32. * -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE * diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h index a647281e29..b87daba1f8 100644 --- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h +++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h @@ -37,20 +37,26 @@ class CLPixelWiseMultiplication : public ICLSimpleFunction { public: /** Initialise the kernel's inputs, output and convertion policy. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,U8) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * - (QSYMM16,QSYMM16) -> S32 * * @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 An input tensor. Data types supported: same as @p input1. + * @param[in, out] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output The output tensor. Data types supported: - * - U8, only if both input are U8 - * - QASYMM8, only if both inputs are QASYMM8 - * - QASYMM8_SIGNED, only if both inputs are QASYMM8_SIGNED - * - S16 - * - QSYMM16, only if both inputs are QSYMM16 - * - S32, only if both inputs are QSYMM16 - * - F16 - * - F32 + * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate @@ -60,13 +66,27 @@ public: void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Initialise the kernel's inputs, output and convertion policy. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,U8) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * - (QSYMM16,QSYMM16) -> S32 * * @param[in] compile_context The compile context to be used. * @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 An input tensor. Data types supported: same as @p input1. + * @param[in, out] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8. + * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate @@ -76,18 +96,25 @@ public: void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplication + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,U8) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * - (QSYMM16,QSYMM16) -> S32 + * * * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] input2 An input tensor info. Data types supported: same as @p input1. - * @param[in] output The output tensor info. Data types supported: - * - U8, only if both input are U8 - * - QASYMM8, only if both inputs are QASYMM8 - * - QASYMM8_SIGNED, only if both inputs are QASYMM8_SIGNED - * - S16 - * - QSYMM16, only if both inputs are QSYMM16 - * - S32, only if both inputs are QSYMM16 - * - F16 - * - F32 + * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h index 013ba647b1..5d050d71d6 100644 --- a/arm_compute/runtime/CL/functions/CLReductionOperation.h +++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h @@ -26,19 +26,17 @@ #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" -#include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLReshapeLayer.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" -#include #include -#include namespace arm_compute { +// Forward declarations class ICLTensor; /** Perform reduction operation. @@ -54,7 +52,7 @@ public: /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32/S32. * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3 * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX @@ -64,7 +62,7 @@ public: /** Set the input and output tensors. * * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32/S32. * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3 * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX @@ -74,7 +72,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperation. * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32/S32. * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3 * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX @@ -95,7 +93,6 @@ private: std::vector _reduction_kernels_vector; std::vector _border_handlers_vector; CLReshapeLayer _reshape; - ReductionOperation _op; unsigned int _num_of_stages; unsigned int _reduction_axis; bool _is_serial; diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h index 93ad24e893..ec57bacf07 100644 --- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h @@ -47,8 +47,6 @@ class ICLTensor; * @f[ out = (x - max(x) * beta) - log(\sum{e^{x - max(x) * beta}}) @f] * * This function runs the following kernels: - * -# @ref CLLogits1DMaxKernel - * -# @ref CLLogits1DShiftExpSumKernel * -# @ref CLLogits1DNormKernel * And if the reduce_end_axis is not 0, the function will use one of the the following kernels to reshape the input and * perform softmax on the reshaped input: @@ -63,36 +61,36 @@ public: CLSoftmaxLayerGeneric(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 for Softmax and F16/F32 for Log Softmax + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax * @param[out] output Destination tensor. Data types supported: same as @p input * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0. - * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image, - * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image. - * Must be in range [0, input_num_dimensions). + * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image, + * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image. + * Must be in range [0, input_num_dimensions). */ void configure(const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t reduce_end_axis = 0); /** Set the input and output tensors. * * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 for Softmax and F16/F32 for Log Softmax + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax * @param[out] output Destination tensor. Data types supported: same as @p input * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0. - * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image, - * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image. - * Must be in range [0, input_num_dimensions). + * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image, + * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image. + * Must be in range [0, input_num_dimensions). */ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t reduce_end_axis = 0); /** Static function to check if given info will lead to a valid configuration of @ref CLSoftmaxLayer * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 for Softmax and F16/F32 for Log Softmax + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax * @param[in] output Destination tensor. Data types supported: same as @p input * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0. - * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image, - * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image. - * Must be in range [0, input_num_dimensions). + * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image, + * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image. + * Must be in range [0, input_num_dimensions). * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, size_t reduce_end_axis = 0); @@ -111,9 +109,9 @@ private: * @param[in] input Original source tensor. * @param[in] output Original destination tensor. * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0. - * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image, - * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image. - * Must be in range [0, input_num_dimensions). + * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image, + * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image. + * Must be in range [0, input_num_dimensions). */ void configure_reshape_input_kernel(const ICLTensor *input, const ICLTensor *output, size_t reduce_end_axis); /** Utility method to configure the kernels needed to flatten the input @@ -127,9 +125,9 @@ private: * @param[in] input Original source tensor. * @param[in] output Original destination tensor. * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0. - * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image, - * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image. - * Must be in range [0, input_num_dimensions). + * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image, + * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image. + * Must be in range [0, input_num_dimensions). */ void configure_reshape_input_kernel(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *output, size_t reduce_end_axis); diff --git a/arm_compute/runtime/CL/functions/CLUnstack.h b/arm_compute/runtime/CL/functions/CLUnstack.h index bdef44fb0d..5d4d5710ab 100644 --- a/arm_compute/runtime/CL/functions/CLUnstack.h +++ b/arm_compute/runtime/CL/functions/CLUnstack.h @@ -48,8 +48,8 @@ public: CLUnstack(); /** Set the input, output and unstacking axis. * - * @param[in] input A tensor to be unstacked. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. - * @param[in,out] output_vector A vector of tensors. Data types supported: Same as @p input. + * @param[in] input A tensor to be unstacked. Data type supported: All. + * @param[in,out] output_vector A vector of tensors. Data types supported: same as @p input. * Note: The number of elements of the vector will be used as the number of slices to be taken from the axis. * @param[in] axis The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around. * @@ -58,8 +58,8 @@ public: /** Set the input, output and unstacking axis. * * @param[in] compile_context The compile context to be used. - * @param[in] input A tensor to be unstacked. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. - * @param[in,out] output_vector A vector of tensors. Data types supported: Same as @p input. + * @param[in] input A tensor to be unstacked. Data type supported: All. + * @param[in,out] output_vector A vector of tensors. Data types supported: same as @p input. * Note: The number of elements of the vector will be used as the number of slices to be taken from the axis. * @param[in] axis The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around. * @@ -67,8 +67,8 @@ public: void configure(const CLCompileContext &compile_context, const ICLTensor *input, const std::vector &output_vector, int axis); /** Static function to check if given info will lead to a valid configuration of @ref CLUnstack * - * @param[in] input Input tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[in] output_vector Vector of output tensors' info. Data types supported: Same as @p input. + * @param[in] input Input tensor info. Data type supported: All. + * @param[in] output_vector Vector of output tensors' info. Data types supported: same as @p input. * @param[in] axis The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around. * * @return a status diff --git a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h index dcaa13471f..07b4c8aecb 100644 --- a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h +++ b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h @@ -53,7 +53,7 @@ public: /** Initialize the function's source, destination, interpolation type and border_mode. * - * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] input Source tensor. Data type supported: All. * @param[out] output Destination tensor. Data types supported: same as @p input. * @param[in] info Contains stride information described in @ref Size2D. * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. @@ -63,16 +63,16 @@ public: /** Initialize the function's source, destination, interpolation type and border_mode. * * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] input Source tensor. Data type supported: All. * @param[out] output Destination tensor. Data types supported: same as @p input. * @param[in] info Contains stride information described in @ref Size2D. * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. */ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy); - /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample + /** Static function to check if given info will lead to a valid configuration of @ref CLUpsampleLayerKernel * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] input Source tensor info. Data types supported: All. * @param[in] output Destination tensor info. Data types supported: same as @p input. * @param[in] info Contains stride information described in @ref Size2D. * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. -- cgit v1.2.1