From 2b84be544e4a27f7e8e80827e9c85c8f0d58b4ce Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Wed, 8 Apr 2020 10:15:51 +0100 Subject: COMPMID-3280: Make all ML primitives for CL use the new interface - Part 2 - CLFunctions have been updated Change-Id: Ie3256a6c775bc12f3126482bd8e8a46da54b267c Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3053 Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- .../runtime/CL/functions/CLAbsoluteDifference.h | 10 ++- arm_compute/runtime/CL/functions/CLAccumulate.h | 25 +++++- .../runtime/CL/functions/CLActivationLayer.h | 11 +++ .../runtime/CL/functions/CLArgMinMaxLayer.h | 9 +++ .../CL/functions/CLBatchNormalizationLayer.h | 21 ++++- .../runtime/CL/functions/CLBatchToSpaceLayer.h | 17 ++++ arm_compute/runtime/CL/functions/CLBitwiseAnd.h | 10 ++- arm_compute/runtime/CL/functions/CLBitwiseNot.h | 9 ++- arm_compute/runtime/CL/functions/CLBitwiseOr.h | 10 ++- arm_compute/runtime/CL/functions/CLBitwiseXor.h | 10 ++- .../runtime/CL/functions/CLBoundingBoxTransform.h | 14 +++- arm_compute/runtime/CL/functions/CLBox3x3.h | 11 ++- arm_compute/runtime/CL/functions/CLCannyEdge.h | 16 +++- arm_compute/runtime/CL/functions/CLCast.h | 22 ++++- .../runtime/CL/functions/CLChannelCombine.h | 21 ++++- .../runtime/CL/functions/CLChannelExtract.h | 18 ++++- .../runtime/CL/functions/CLChannelShuffleLayer.h | 8 ++ arm_compute/runtime/CL/functions/CLColorConvert.h | 32 +++++++- arm_compute/runtime/CL/functions/CLComparison.h | 24 ++++++ .../runtime/CL/functions/CLComputeAllAnchors.h | 11 ++- .../runtime/CL/functions/CLConcatenateLayer.h | 14 +++- .../CL/functions/CLConvertFullyConnectedWeights.h | 24 +++++- arm_compute/runtime/CL/functions/CLConvolution.h | 38 ++++++++- .../runtime/CL/functions/CLConvolutionLayer.h | 23 ++++++ arm_compute/runtime/CL/functions/CLCopy.h | 10 ++- arm_compute/runtime/CL/functions/CLCropResize.h | 28 ++++++- .../runtime/CL/functions/CLDeconvolutionLayer.h | 13 +++ .../CL/functions/CLDeconvolutionLayerUpsample.h | 8 ++ .../runtime/CL/functions/CLDepthConvertLayer.h | 23 +++++- .../runtime/CL/functions/CLDepthToSpaceLayer.h | 8 ++ .../CL/functions/CLDepthwiseConvolutionLayer.h | 65 +++++++++++++++ .../runtime/CL/functions/CLDequantizationLayer.h | 8 ++ arm_compute/runtime/CL/functions/CLDerivative.h | 15 +++- arm_compute/runtime/CL/functions/CLDilate.h | 11 ++- .../CL/functions/CLDirectConvolutionLayer.h | 16 ++++ .../CL/functions/CLDirectDeconvolutionLayer.h | 15 ++++ .../runtime/CL/functions/CLElementWiseUnaryLayer.h | 51 +++++++++++- .../runtime/CL/functions/CLElementwiseOperations.h | 79 ++++++++++++++++++ .../runtime/CL/functions/CLEqualizeHistogram.h | 9 ++- arm_compute/runtime/CL/functions/CLErode.h | 11 ++- arm_compute/runtime/CL/functions/CLFFT1D.h | 10 ++- arm_compute/runtime/CL/functions/CLFFT2D.h | 10 ++- .../runtime/CL/functions/CLFFTConvolutionLayer.h | 19 ++++- arm_compute/runtime/CL/functions/CLFastCorners.h | 15 +++- arm_compute/runtime/CL/functions/CLFill.h | 9 ++- arm_compute/runtime/CL/functions/CLFillBorder.h | 11 ++- arm_compute/runtime/CL/functions/CLFlattenLayer.h | 9 +++ arm_compute/runtime/CL/functions/CLFloor.h | 2 +- .../runtime/CL/functions/CLFullyConnectedLayer.h | 41 +++++++++- .../CL/functions/CLFuseBatchNormalization.h | 21 ++++- arm_compute/runtime/CL/functions/CLGEMM.h | 42 ++++++++-- .../runtime/CL/functions/CLGEMMConvolutionLayer.h | 51 +++++++++++- .../CL/functions/CLGEMMDeconvolutionLayer.h | 11 +++ .../CL/functions/CLGEMMLowpMatrixMultiplyCore.h | 19 +++++ .../runtime/CL/functions/CLGEMMLowpOutputStage.h | 93 ++++++++++++++++++++++ arm_compute/runtime/CL/functions/CLGather.h | 9 +++ arm_compute/runtime/CL/functions/CLGaussian3x3.h | 11 ++- arm_compute/runtime/CL/functions/CLGaussian5x5.h | 11 ++- .../runtime/CL/functions/CLGaussianPyramid.h | 14 +++- .../CL/functions/CLGenerateProposalsLayer.h | 18 +++++ arm_compute/runtime/CL/functions/CLHOGDescriptor.h | 13 ++- arm_compute/runtime/CL/functions/CLHOGDetector.h | 18 ++++- arm_compute/runtime/CL/functions/CLHOGGradient.h | 15 +++- .../runtime/CL/functions/CLHOGMultiDetection.h | 27 ++++++- arm_compute/runtime/CL/functions/CLHarrisCorners.h | 19 ++++- arm_compute/runtime/CL/functions/CLHistogram.h | 9 ++- .../CL/functions/CLInstanceNormalizationLayer.h | 12 +++ arm_compute/runtime/CL/functions/CLIntegralImage.h | 9 ++- .../runtime/CL/functions/CLL2NormalizeLayer.h | 11 ++- arm_compute/runtime/CL/functions/CLLSTMLayer.h | 46 +++++++++++ .../runtime/CL/functions/CLLSTMLayerQuantized.h | 29 ++++++- .../runtime/CL/functions/CLLaplacianPyramid.h | 15 +++- .../runtime/CL/functions/CLLaplacianReconstruct.h | 18 ++++- .../runtime/CL/functions/CLLocallyConnectedLayer.h | 15 +++- arm_compute/runtime/CL/functions/CLMagnitude.h | 11 ++- arm_compute/runtime/CL/functions/CLMeanStdDev.h | 10 ++- .../CL/functions/CLMeanStdDevNormalizationLayer.h | 12 ++- arm_compute/runtime/CL/functions/CLMedian3x3.h | 11 ++- .../runtime/CL/functions/CLMinMaxLocation.h | 18 ++++- .../runtime/CL/functions/CLNonLinearFilter.h | 16 +++- .../CL/functions/CLNonMaximaSuppression3x3.h | 14 +++- .../runtime/CL/functions/CLNormalizationLayer.h | 13 ++- .../CL/functions/CLNormalizePlanarYUVLayer.h | 11 +++ arm_compute/runtime/CL/functions/CLOpticalFlow.h | 23 +++++- arm_compute/runtime/CL/functions/CLPReluLayer.h | 10 +++ arm_compute/runtime/CL/functions/CLPadLayer.h | 13 +++ arm_compute/runtime/CL/functions/CLPermute.h | 10 +++ arm_compute/runtime/CL/functions/CLPhase.h | 11 ++- .../CL/functions/CLPixelWiseMultiplication.h | 27 +++++++ arm_compute/runtime/CL/functions/CLPoolingLayer.h | 9 +++ arm_compute/runtime/CL/functions/CLPriorBoxLayer.h | 11 ++- arm_compute/runtime/CL/functions/CLQLSTMLayer.h | 72 ++++++++++++++--- .../runtime/CL/functions/CLQuantizationLayer.h | 9 +++ arm_compute/runtime/CL/functions/CLRNNLayer.h | 15 +++- arm_compute/runtime/CL/functions/CLROIAlignLayer.h | 16 ++++ .../runtime/CL/functions/CLROIPoolingLayer.h | 17 +++- arm_compute/runtime/CL/functions/CLRange.h | 11 ++- arm_compute/runtime/CL/functions/CLReduceMean.h | 11 +++ .../runtime/CL/functions/CLReductionOperation.h | 10 +++ arm_compute/runtime/CL/functions/CLRemap.h | 16 +++- arm_compute/runtime/CL/functions/CLReorgLayer.h | 12 +++ arm_compute/runtime/CL/functions/CLReshapeLayer.h | 7 ++ arm_compute/runtime/CL/functions/CLReverse.h | 8 ++ arm_compute/runtime/CL/functions/CLScale.h | 15 ++++ arm_compute/runtime/CL/functions/CLScharr3x3.h | 14 +++- arm_compute/runtime/CL/functions/CLSelect.h | 9 +++ arm_compute/runtime/CL/functions/CLSlice.h | 14 ++++ arm_compute/runtime/CL/functions/CLSobel3x3.h | 14 +++- arm_compute/runtime/CL/functions/CLSobel5x5.h | 14 +++- arm_compute/runtime/CL/functions/CLSobel7x7.h | 14 +++- arm_compute/runtime/CL/functions/CLSoftmaxLayer.h | 28 ++++++- .../runtime/CL/functions/CLSpaceToBatchLayer.h | 21 +++++ .../runtime/CL/functions/CLSpaceToDepthLayer.h | 8 ++ arm_compute/runtime/CL/functions/CLStackLayer.h | 11 +++ arm_compute/runtime/CL/functions/CLStridedSlice.h | 18 +++++ arm_compute/runtime/CL/functions/CLTableLookup.h | 10 ++- arm_compute/runtime/CL/functions/CLThreshold.h | 16 +++- arm_compute/runtime/CL/functions/CLTile.h | 8 ++ arm_compute/runtime/CL/functions/CLTranspose.h | 7 ++ arm_compute/runtime/CL/functions/CLUnstack.h | 12 ++- arm_compute/runtime/CL/functions/CLUpsampleLayer.h | 10 +++ arm_compute/runtime/CL/functions/CLWarpAffine.h | 15 +++- .../runtime/CL/functions/CLWarpPerspective.h | 14 +++- .../CL/functions/CLWinogradConvolutionLayer.h | 22 ++++- .../CL/functions/CLWinogradInputTransform.h | 21 ++++- arm_compute/runtime/CL/functions/CLYOLOLayer.h | 14 +++- 126 files changed, 2118 insertions(+), 105 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h index 28d3acc043..26aded6def 100644 --- a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h +++ b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -45,6 +45,14 @@ public: * @param[out] output Output tensor. Data types supported: U8, S16 */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 First input tensor. Data types supported: U8, S16 + * @param[in] input2 Second input tensor. Data types supported: U8, S16 + * @param[out] output Output tensor. Data types supported: U8, S16 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); }; } #endif /* ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H */ diff --git a/arm_compute/runtime/CL/functions/CLAccumulate.h b/arm_compute/runtime/CL/functions/CLAccumulate.h index f465ab3c46..b47f0c0e4a 100644 --- a/arm_compute/runtime/CL/functions/CLAccumulate.h +++ b/arm_compute/runtime/CL/functions/CLAccumulate.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -42,6 +42,13 @@ public: * @param[out] accum Destination tensor. Data types supported: S16. */ void configure(const ICLTensor *input, ICLTensor *accum); + /** Set the input and accumulation tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] accum Destination tensor. Data types supported: S16. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum); }; /** Basic function to run @ref CLAccumulateWeightedKernel */ @@ -55,6 +62,14 @@ public: * @param[in,out] accum Accumulated tensor. Data types supported: U8. */ void configure(const ICLTensor *input, float alpha, ICLTensor *accum); + /** Set the input and accumulation tensors, and the scale value. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] alpha The input scalar value with a value input the range of [0, 1.0]. Data types supported: F32. + * @param[in,out] accum Accumulated tensor. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum); }; /** Basic function to run @ref CLAccumulateSquaredKernel */ @@ -68,6 +83,14 @@ public: * @param[in,out] accum Accumulated tensor. Data types supported: S16. */ void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum); + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] shift The input with a value input the range of [0, 15]. Data types supported: U32. + * @param[in,out] accum Accumulated tensor. Data types supported: S16. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum); }; } #endif /*ARM_COMPUTE_CLACCUMULATE_H */ diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h index 09f5d2bf58..fbb34e5fb9 100644 --- a/arm_compute/runtime/CL/functions/CLActivationLayer.h +++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h @@ -62,6 +62,17 @@ public: * @param[in] act_info Activation layer parameters. */ void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] act_info Activation layer parameters. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayer * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result diff --git a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h index a26fcfda56..b0d29bcefe 100644 --- a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h @@ -61,6 +61,15 @@ public: * @param[in] op Reduction operation to perform. Operations supported: ARG_IDX_MAX, ARG_IDX_MIN */ void configure(const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input source tensor. Data types supported: QASYMM8/F16/F32. + * @param[in] axis Axis to find max/min index. + * @param[out] output Output source tensor. Data types supported: U32/S32. + * @param[in] op Reduction operation to perform. Operations supported: ARG_IDX_MAX, ARG_IDX_MIN + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op); /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayer * * @param[in] input Input source tensor info. Data types supported: QASYMM8/F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h index 7cd4d164d8..a211ea6b04 100644 --- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -61,6 +61,25 @@ public: */ void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr, float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); + /** Set the input and output tensors. + * + * @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input + * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input + * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, + const ICLTensor *gamma = nullptr, + float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayer * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result. diff --git a/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h index b98702819b..6edb4641fe 100644 --- a/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h +++ b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h @@ -46,6 +46,14 @@ public: * @param[out] output Tensor output. Data types supported: same as @p input */ void configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output); /** Set the input and output tensors. (Static block shape). * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. @@ -54,6 +62,15 @@ public: * @param[out] output Tensor output. Data types supported: same as @p input */ void configure(const ICLTensor *input, int32_t block_shape_x, int32_t block_shape_y, ICLTensor *output); + /** Set the input and output tensors. (Static block shape). + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, int32_t block_shape_x, int32_t block_shape_y, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer * * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. diff --git a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h index 77907cc08b..1faded04fe 100644 --- a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h +++ b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -45,6 +45,14 @@ public: * @param[out] output Output tensor. Data types supported: U8. */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Input tensor. Data types supported: U8. + * @param[in] input2 Input tensor. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); }; } #endif /* ARM_COMPUTE_CLBITWISEAND_H */ diff --git a/arm_compute/runtime/CL/functions/CLBitwiseNot.h b/arm_compute/runtime/CL/functions/CLBitwiseNot.h index b5c7cfe5fc..c9460555dd 100644 --- a/arm_compute/runtime/CL/functions/CLBitwiseNot.h +++ b/arm_compute/runtime/CL/functions/CLBitwiseNot.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -44,6 +44,13 @@ public: * @param[out] output Output tensor. Data types supported: U8. */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); }; } #endif /* ARM_COMPUTE_CLBITWISENOT_H */ diff --git a/arm_compute/runtime/CL/functions/CLBitwiseOr.h b/arm_compute/runtime/CL/functions/CLBitwiseOr.h index 5957c3f6a8..4fb93cc8a2 100644 --- a/arm_compute/runtime/CL/functions/CLBitwiseOr.h +++ b/arm_compute/runtime/CL/functions/CLBitwiseOr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -45,6 +45,14 @@ public: * @param[out] output Output tensor. Data types supported: U8. */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Input tensor. Data types supported: U8. + * @param[in] input2 Input tensor. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); }; } #endif /* ARM_COMPUTE_CLBITWISEOR_H */ diff --git a/arm_compute/runtime/CL/functions/CLBitwiseXor.h b/arm_compute/runtime/CL/functions/CLBitwiseXor.h index a4e864c0aa..6caa013607 100644 --- a/arm_compute/runtime/CL/functions/CLBitwiseXor.h +++ b/arm_compute/runtime/CL/functions/CLBitwiseXor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -45,6 +45,14 @@ public: * @param[out] output Output tensor. Data types supported: U8. */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Input tensor. Data types supported: U8. + * @param[in] input2 Input tensor. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); }; } #endif /* ARM_COMPUTE_CLBITWISEXOR_H */ diff --git a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h index 3e11781827..b09359dfc2 100644 --- a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h +++ b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,6 +50,18 @@ public: * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. */ void configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. + * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input + * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. + * + * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform * diff --git a/arm_compute/runtime/CL/functions/CLBox3x3.h b/arm_compute/runtime/CL/functions/CLBox3x3.h index 3fb18e3270..a4cf4d296b 100644 --- a/arm_compute/runtime/CL/functions/CLBox3x3.h +++ b/arm_compute/runtime/CL/functions/CLBox3x3.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,6 +50,15 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); }; } #endif /*ARM_COMPUTE_CLBOX3X3_H */ diff --git a/arm_compute/runtime/CL/functions/CLCannyEdge.h b/arm_compute/runtime/CL/functions/CLCannyEdge.h index 1a5676795f..2729d241a9 100644 --- a/arm_compute/runtime/CL/functions/CLCannyEdge.h +++ b/arm_compute/runtime/CL/functions/CLCannyEdge.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -69,6 +69,20 @@ public: */ void configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] upper_thr Upper threshold used for the hysteresis. + * @param[in] lower_thr Lower threshold used for the hysteresis. + * @param[in] gradient_size Gradient size (3, 5 or 7). + * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, + uint8_t constant_border_value = 0); // Inherited methods overridden: virtual void run() override; diff --git a/arm_compute/runtime/CL/functions/CLCast.h b/arm_compute/runtime/CL/functions/CLCast.h index 4cb1fe0bb5..6a1835c73a 100644 --- a/arm_compute/runtime/CL/functions/CLCast.h +++ b/arm_compute/runtime/CL/functions/CLCast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -56,6 +56,26 @@ public: * @param[in] policy Conversion policy. */ void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy); + /** Initialize the function's source, destination + * + * Input data type must be different than output data type. + * + * Valid conversions Input -> Output : + * + * - U8 -> S8, U16, S16, U32, S32, F16, F32 + * - U16 -> U8, S8, S16, U32, S32, F16, F32 + * - S16 -> U8, S8, U16, U32, S32, F16, F32 + * - U32 -> U8, S8, U16, S16, S32, F16, F32 + * - S32 -> U8, S8, U16, S16, U32, F16, F32 + * - F16 -> U8, S8, U16, S16, U32, F32 + * - F32 -> U8, S8, U16, S16, U32, F16 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy); /** Static function to check if given info will lead to a valid configuration of @ref CLCast * * @param[in] input Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLChannelCombine.h b/arm_compute/runtime/CL/functions/CLChannelCombine.h index 25f31d86d1..474830d7af 100644 --- a/arm_compute/runtime/CL/functions/CLChannelCombine.h +++ b/arm_compute/runtime/CL/functions/CLChannelCombine.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -45,6 +45,16 @@ public: * @param[out] output The single planar output tensor. */ void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); + /** Initialize function's inputs and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. + * @param[out] output The single planar output tensor. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); /** Initialize function's inputs and outputs. * * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. @@ -53,6 +63,15 @@ public: * @param[out] output The multi planar output image. */ void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); + /** Initialize function's inputs and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[out] output The multi planar output image. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); }; } #endif /*ARM_COMPUTE_CLCHANNELCOMBINE_H*/ diff --git a/arm_compute/runtime/CL/functions/CLChannelExtract.h b/arm_compute/runtime/CL/functions/CLChannelExtract.h index 77d84b968c..aa25516e18 100644 --- a/arm_compute/runtime/CL/functions/CLChannelExtract.h +++ b/arm_compute/runtime/CL/functions/CLChannelExtract.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -44,6 +44,14 @@ public: * @param[out] output The extracted channel. Must be of U8 format. */ void configure(const ICLTensor *input, Channel channel, ICLTensor *output); + /** Initialize the function's source, destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to extract the channel from. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 + * @param[in] channel The channel to extract. + * @param[out] output The extracted channel. Must be of U8 format. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output); /** Initialize the function's source, destination * * @param[in] input The multi-planar input image to extract channel from. Formats supported: NV12/NV21/IYUV/YUV444 @@ -51,6 +59,14 @@ public: * @param[out] output The extracted 2D channel. Must be of U8 format. */ void configure(const ICLMultiImage *input, Channel channel, ICLImage *output); + /** Initialize the function's source, destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The multi-planar input image to extract channel from. Formats supported: NV12/NV21/IYUV/YUV444 + * @param[in] channel The channel to extract. + * @param[out] output The extracted 2D channel. Must be of U8 format. + */ + void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output); }; } #endif /*ARM_COMPUTE_CLCHANNELEXTRACT_H*/ diff --git a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h index 6e30bd3ebd..183a2f1ea6 100644 --- a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h +++ b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h @@ -46,6 +46,14 @@ public: * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. */ void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups); /** Static function to check if given info will lead to a valid configuration of @ref CLChannelShuffleLayerKernel * * @param[in] input Input tensor info. Data types supported: All. diff --git a/arm_compute/runtime/CL/functions/CLColorConvert.h b/arm_compute/runtime/CL/functions/CLColorConvert.h index 1a3bea9cd3..8721e8afa1 100644 --- a/arm_compute/runtime/CL/functions/CLColorConvert.h +++ b/arm_compute/runtime/CL/functions/CLColorConvert.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -47,24 +47,54 @@ public: * U8 (if the formats of @p input is RGB888) */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialize the function's source, destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 + * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), + * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), + * U8 (if the formats of @p input is RGB888) + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Initialize the function's source, destination * * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 */ void configure(const ICLMultiImage *input, ICLImage *output); + /** Initialize the function's source, destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 + */ + void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output); /** Initialize the function's source, destination * * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) */ void configure(const ICLImage *input, ICLMultiImage *output); + /** Initialize the function's source, destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output); /** Initialize the function's source, destination * * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) */ void configure(const ICLMultiImage *input, ICLMultiImage *output); + /** Initialize the function's source, destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) + */ + void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output); }; } #endif /* ARM_COMPUTE_CLCOLORCONVERT_H */ diff --git a/arm_compute/runtime/CL/functions/CLComparison.h b/arm_compute/runtime/CL/functions/CLComparison.h index 85dbe7129d..4e681e73a7 100644 --- a/arm_compute/runtime/CL/functions/CLComparison.h +++ b/arm_compute/runtime/CL/functions/CLComparison.h @@ -46,6 +46,17 @@ public: * @param[out] operation Comparison operation to be used. */ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ComparisonOperation operation); + /** Initialise the kernel's inputs and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source tensor. Data types supported: All. + * The input1 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in] input2 Source tensor. Data types supported: Same as @p input1. + * The input2 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[out] operation Comparison operation to be used. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ComparisonOperation operation); /** Static function to check if given info will lead to a valid configuration of @ref CLComparison * * @param[in] input1 Source tensor. Data types supported: All. @@ -75,6 +86,19 @@ public: * @param[out] output Destination tensor. Data types supported: U8. */ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output); + /** Comparison operations used by the class */ + +public: + /** Initialise the kernel's inputs and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * The input1 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in] input2 Source tensor. Data types supported: Same as @p input1. + * The input2 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLComparison * * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h b/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h index a039320c4e..15c5bfeb7d 100644 --- a/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h +++ b/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -47,6 +47,15 @@ public: * */ void configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] anchors Source tensor. Original set of anchors of size (4, A) where A is the number of anchors. Data types supported: F16/F32 + * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input + * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo + * + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLComputeAllAnchorsKernel * diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h index c3d065a2ba..b8e3361e9e 100644 --- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h +++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h @@ -62,6 +62,18 @@ public: */ void configure(std::vector &inputs_vector, ICLTensor *output, size_t axis); void configure(std::vector &inputs_vector, ICLTensor *output, size_t axis); + /** Initialise the kernel's inputs vector and output. + * + * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. + * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All. + * @param[out] output Output tensor. Data types supported: Same as @p input. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. + */ + void configure(const CLCompileContext &compile_context, std::vector &inputs_vector, ICLTensor *output, size_t axis); + void configure(const CLCompileContext &compile_context, std::vector &inputs_vector, ICLTensor *output, size_t axis); /** Static function to check if given info will lead to a valid configuration of @ref CLConcatenateLayer * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. @@ -81,7 +93,7 @@ public: private: template - void configure_internal(std::vector &&inputs_vector, ICLTensor *output, size_t axis); + void configure_internal(const CLCompileContext &compile_context, std::vector &&inputs_vector, ICLTensor *output, size_t axis); template static Status validate_internal(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis); diff --git a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h index 76a28ed6fe..123f6380bb 100644 --- a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h +++ b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h @@ -47,6 +47,17 @@ public: * @return A status */ void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout); + /** Initialize the function. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. + * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. + * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). + * @param[in] data_layout The data layout the weights have been trained in. + * + * @return A status + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout); /** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeights * * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All. @@ -96,7 +107,18 @@ public: */ void configure(const ICLTensor *input, const TensorShape &original_input_shape, DataLayout data_layout) { - _func.configure(input, &_output, original_input_shape, data_layout); + configure(CLKernelLibrary::get().get_compile_context(), input, original_input_shape, data_layout); + } + /** Configures the @ref CLConvertFullyConnectedWeights function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source weights tensor info to convert. Data type supported: All. + * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). + * @param[in] data_layout The data layout the weights have been trained in. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const TensorShape &original_input_shape, DataLayout data_layout) + { + _func.configure(compile_context, input, &_output, original_input_shape, data_layout); } private: diff --git a/arm_compute/runtime/CL/functions/CLConvolution.h b/arm_compute/runtime/CL/functions/CLConvolution.h index 43507d7cbc..72ef8ce7b8 100644 --- a/arm_compute/runtime/CL/functions/CLConvolution.h +++ b/arm_compute/runtime/CL/functions/CLConvolution.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,17 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); }; /** Basic function to execute square convolution.Currently it supports 5x5, 7x7, 9x9. This function calls the following OpenCL kernels: @@ -84,6 +95,17 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); // Inherited methods overriden: void run() override; @@ -127,6 +149,20 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] rows Rows of convolution kernel. + * @param[in] cols Columns of convolution kernel. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, + uint8_t constant_border_value = 0); }; } #endif /*ARM_COMPUTE_CLCONVOLUTION_H */ diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h index b52695463a..fff9173210 100644 --- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h @@ -94,6 +94,29 @@ public: */ void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p input. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation + * available which may introduce a drop of accuracy as well. Default is false + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, + unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], diff --git a/arm_compute/runtime/CL/functions/CLCopy.h b/arm_compute/runtime/CL/functions/CLCopy.h index 9252ac3c57..31b73c33c3 100644 --- a/arm_compute/runtime/CL/functions/CLCopy.h +++ b/arm_compute/runtime/CL/functions/CLCopy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,6 +43,14 @@ public: * */ void configure(ICLTensor *input, ICLTensor *output); + /** Initialise the function's source and destination. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[out] output Output tensor. Data types supported: Same as @p input. + * + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLCopy * * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. diff --git a/arm_compute/runtime/CL/functions/CLCropResize.h b/arm_compute/runtime/CL/functions/CLCropResize.h index 244e345b03..86df0d46d1 100644 --- a/arm_compute/runtime/CL/functions/CLCropResize.h +++ b/arm_compute/runtime/CL/functions/CLCropResize.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -62,10 +62,10 @@ public: * @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used. * @note Start and end indices of boxes are inclusive. * - * @param[in] input Source tensor containing N batches of 3D images to be cropped. Data type supported: F32 - * @param[in] boxes Tensor containing the boxes used to crop the images. Data type supported: F32 + * @param[in] input Source tensor containing N batches of 3D images to be cropped. Data type supported: : U16/S16/U32/S32/F16/F32 + * @param[in] boxes Tensor containing the boxes used to crop the images. It has to be known before configuration. Data type supported: F32 * @param[in] box_ind One dimensional tensor containing the batch index of the 3D image in @p input that the corresponding - * box in @p boxes will be applied to. Data type supported: F32 + * box in @p boxes will be applied to. It has to be known before configuration. Data type supported: F32 * @param[out] output Destination tensor containing a cropped and resized image for each box in @p boxes. Data type supported: F32 * @param[in] crop_size The dimensions that each cropped image will be resized to. * @param[in] method The policy to be used when resizing image. Default is bilinear. @@ -73,6 +73,24 @@ public: */ void configure(const ICLTensor *input, ICLTensor *boxes, ICLTensor *box_ind, ICLTensor *output, Coordinates2D crop_size, InterpolationPolicy method = InterpolationPolicy::BILINEAR, float extrapolation_value = 0); + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used. + * @note Start and end indices of boxes are inclusive. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor containing N batches of 3D images to be cropped. Data type supported: U16/S16/U32/S32/F16/F32 + * @param[in] boxes Tensor containing the boxes used to crop the images. It has to be known before configuration. Data type supported: F32 + * @param[in] box_ind One dimensional tensor containing the batch index of the 3D image in @p input that the corresponding + * box in @p boxes will be applied to. It has to be known before configuration. Data type supported: F32 + * @param[out] output Destination tensor containing a cropped and resized image for each box in @p boxes. Data type supported: F32 + * @param[in] crop_size The dimensions that each cropped image will be resized to. + * @param[in] method The policy to be used when resizing image. Default is bilinear. + * @param[in] extrapolation_value Value to be used for values outside of the image for cropping and resizing. Default is 0. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *boxes, ICLTensor *box_ind, ICLTensor *output, Coordinates2D crop_size, + InterpolationPolicy method = InterpolationPolicy::BILINEAR, float extrapolation_value = 0); /** Static function to check if given info will lead to a valid configuration of @ref NESlice * @@ -109,6 +127,8 @@ public: std::vector> _copy; std::vector> _crop_results; std::vector> _scaled_results; + + std::vector> _internal_kernels; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CL_CROP_RESIZE_H */ diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h index 78c149d933..c75b586132 100644 --- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h @@ -55,6 +55,19 @@ public: * */ void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info, const WeightsInfo &weights_info = WeightsInfo()); + /** Set the input, weights, biases and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as @p input. + * @param[out] output Output tensor. The output has the same number of dimensions as the @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. + * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. + * + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info, + const WeightsInfo &weights_info = WeightsInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayer * * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h index 5a1009c79f..2d3dde1ea0 100644 --- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h +++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h @@ -64,6 +64,14 @@ public: * @param[in] info Contains padding and policies to be used in the deconvolution. */ void configure(ICLTensor *input, ICLTensor *output, const PadStrideInfo &info); + /** Initialize the function's source, destination, interpolation type and border_mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. Data type supported: All. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const PadStrideInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample * * @param[in] input Source tensor info. Data type supported: All. diff --git a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h index 1b9476c3a5..910b9eac51 100644 --- a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -57,6 +57,27 @@ public: * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. */ void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); + /** Initialize the function's source, destination + * + * Input data type must be different than output data type. + * + * Valid conversions Input -> Output : + * + * - U8 -> S8, U16, S16, U32, S32, F16, F32 + * - U16 -> U8, S8, S16, U32, S32, F16, F32 + * - S16 -> U8, S8, U16, U32, S32, F16, F32 + * - U32 -> U8, S8, U16, S16, S32, F16, F32 + * - S32 -> U8, S8, U16, S16, U32, F16, F32 + * - F16 -> U8, S8, U16, S16, U32, F32 + * - F32 -> U8, S8, U16, S16, U32, F16 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy. + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayer * * @param[in] input Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h index 0c33ed34be..dbf5898319 100644 --- a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h @@ -42,6 +42,14 @@ public: * @param[in] block_shape Block shape value. */ void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayer. * * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h index 4668e82bab..63c359e68c 100644 --- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h @@ -70,6 +70,22 @@ public: */ void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); + /** Initialize the function's source, destination, weights and convolution information. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW + * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. + * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer * @@ -150,6 +166,22 @@ private: */ void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). + * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. + * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3 * @@ -234,6 +266,22 @@ private: */ void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); + /** Initialize the function's source, destination, weights and convolution information. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F32. (Written to only for border filling). + * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerGeneric * @@ -328,6 +376,23 @@ public: ARM_COMPUTE_DEPRECATED_REL_REPLACE(20.02, CLDepthwiseConvolutionLayer) void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). + * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. + * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + */ + ARM_COMPUTE_DEPRECATED_REL_REPLACE(20.02, CLDepthwiseConvolutionLayer) + void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3 * diff --git a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h index 48d6ba8435..c0a0fcd988 100644 --- a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h @@ -44,6 +44,14 @@ public: * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. */ void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. + * Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayer * * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. diff --git a/arm_compute/runtime/CL/functions/CLDerivative.h b/arm_compute/runtime/CL/functions/CLDerivative.h index 1155d401ee..5875ceb86d 100644 --- a/arm_compute/runtime/CL/functions/CLDerivative.h +++ b/arm_compute/runtime/CL/functions/CLDerivative.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -54,6 +54,19 @@ public: * */ void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination tensor. Derivative along the X direction. Data types supported: S16. + * @param[out] output_y (optional) Destination tensor. Derivative along the Y direction. Data types supported: S16. + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); }; } #endif /* ARM_COMPUTE_CLDERIVATIVE_H */ diff --git a/arm_compute/runtime/CL/functions/CLDilate.h b/arm_compute/runtime/CL/functions/CLDilate.h index ceea4567b2..cc84820f9f 100644 --- a/arm_compute/runtime/CL/functions/CLDilate.h +++ b/arm_compute/runtime/CL/functions/CLDilate.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,6 +50,15 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialise the kernel's inputs, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); }; } #endif /*ARM_COMPUTE_CLDILATE_H */ diff --git a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h index 045b1c0c99..0c81ffa460 100644 --- a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h @@ -57,6 +57,22 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], diff --git a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h index 6632bfce80..1fed460e69 100644 --- a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h @@ -98,6 +98,21 @@ public: * */ void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info, const WeightsInfo &weights_info = WeightsInfo()); + /** Set the input, weights, biases and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type + * @param[out] output Output tensor. The output has the same number of dimensions as the @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. + * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. + * + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info, + const WeightsInfo &weights_info = WeightsInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLDirectDeconvolutionLayer * * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. diff --git a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h index e2503f7bdf..19729b61cc 100644 --- a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h +++ b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -40,6 +40,13 @@ public: * @param[out] output Output tensor. Data types supported: same as @p input. */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: F16/F32. + * @param[out] output Output tensor. Data types supported: same as @p input. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLRsqrtLayer * * @param[in] input First tensor input info. Data types supported: F16/F32. @@ -60,6 +67,13 @@ public: * @param[out] output Output tensor. Data types supported: same as @p input. */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: F16/F32. + * @param[out] output Output tensor. Data types supported: same as @p input. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLExpLayer * * @param[in] input First tensor input info. Data types supported: F16/F32. @@ -80,6 +94,13 @@ public: * @param[out] output Output tensor. Data types supported: same as @p input. */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: F16/F32. + * @param[out] output Output tensor. Data types supported: same as @p input. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLNegLayer * * @param[in] input First tensor input info. Data types supported: F16/F32. @@ -100,6 +121,13 @@ public: * @param[out] output Output tensor. Data types supported: same as @p input. */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: F16/F32. + * @param[out] output Output tensor. Data types supported: same as @p input. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLSinLayer * * @param[in] input First tensor input info. Data types supported: F16/F32. @@ -120,6 +148,13 @@ public: * @param[out] output Output tensor. Data types supported: same as @p input. */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: F16/F32. + * @param[out] output Output tensor. Data types supported: same as @p input. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLLogLayer * * @param[in] input First tensor input info. Data types supported: F16/F32. @@ -140,6 +175,13 @@ public: * @param[out] output Output tensor. Data types supported: same as @p input. */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: F16/F32. + * @param[out] output Output tensor. Data types supported: same as @p input. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLAbsLayer * * @param[in] input First tensor input info. Data types supported: F16/F32. @@ -160,6 +202,13 @@ public: * @param[out] output Output tensor. Data types supported: same as @p input. */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: F16/F32. + * @param[out] output Output tensor. Data types supported: same as @p input. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLRoundLayer * * @param[in] input First tensor input info. Data types supported: F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h index 6d9f3a0e97..8c656ed8bc 100644 --- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h +++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h @@ -50,6 +50,18 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Initialise the kernel's inputs, output and conversion policy. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QSYMM16 (only if @p input1 is QSYMM16), S16/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QSYMM16 (only if both inputs is QSYMM16), S16/F16/F32. + * @param[in] policy Policy to use to handle overflow. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel for addition * * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. @@ -82,6 +94,18 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Initialise the kernel's inputs, output and conversion policy. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32. + * @param[in] policy Policy to use to handle overflow. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel for subtraction * * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32. @@ -113,6 +137,17 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Initialise the kernel's inputs, output. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input1 First tensor input. Data types supported: F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 Second tensor input. Same as @p input1. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output Output tensor. Data types supported: Same as @p input1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticDivision * * @param[in] input1 First tensor input info. Data types supported: F16/F32. @@ -143,6 +178,17 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Initialise the kernel's inputs, output and conversion policy. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for max * * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. @@ -173,6 +219,17 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Initialise the kernel's inputs, output and conversion policy. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for min * * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. @@ -203,6 +260,17 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Initialise the kernel's inputs, output and conversion policy. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for squared difference * * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32. @@ -233,6 +301,17 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Initialise the kernel's inputs, output and conversion policy. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input1 First tensor input. Data types supported: F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 Second tensor input. Data types supported: F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output Output tensor. Data types supported:F16/F32. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for power * * @param[in] input1 First tensor input info. Data types supported: F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h index 79c18fae9f..d907cfb092 100644 --- a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h +++ b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -54,6 +54,13 @@ public: * @param[out] output Output of same data type with equalized brightness and contrast. */ void configure(const ICLImage *input, ICLImage *output); + /** Initialise the kernel's inputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input image. Data types supported: U8. + * @param[out] output Output of same data type with equalized brightness and contrast. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLErode.h b/arm_compute/runtime/CL/functions/CLErode.h index a438f4e114..57f701cce2 100644 --- a/arm_compute/runtime/CL/functions/CLErode.h +++ b/arm_compute/runtime/CL/functions/CLErode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,6 +50,15 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialise the kernel's inputs, output and border mode + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); }; } #endif /*ARM_COMPUTE_CLERODE_H */ diff --git a/arm_compute/runtime/CL/functions/CLFFT1D.h b/arm_compute/runtime/CL/functions/CLFFT1D.h index 31e57e13c6..da153225c8 100644 --- a/arm_compute/runtime/CL/functions/CLFFT1D.h +++ b/arm_compute/runtime/CL/functions/CLFFT1D.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -56,6 +56,14 @@ public: * @param[in] config FFT related configuration */ void configure(const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * @param[in] config FFT related configuration + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config); /** Static function to check if given info will lead to a valid configuration of @ref CLFFT1D. * * @param[in] input Source tensor info. Data types supported: F32. diff --git a/arm_compute/runtime/CL/functions/CLFFT2D.h b/arm_compute/runtime/CL/functions/CLFFT2D.h index d34528b9cf..a113f20f91 100644 --- a/arm_compute/runtime/CL/functions/CLFFT2D.h +++ b/arm_compute/runtime/CL/functions/CLFFT2D.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -53,6 +53,14 @@ public: * @param[in] config FFT related configuration */ void configure(const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * @param[in] config FFT related configuration + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config); /** Static function to check if given info will lead to a valid configuration of @ref CLFFT2D. * * @param[in] input Source tensor info. Data types supported: F32. diff --git a/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h index 34bb93ab54..740731950e 100644 --- a/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -85,6 +85,23 @@ public: */ void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Set the input and output tensors. + * + * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLFFTConvolutionLayer * * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout diff --git a/arm_compute/runtime/CL/functions/CLFastCorners.h b/arm_compute/runtime/CL/functions/CLFastCorners.h index 2a0e0104b8..1dc87d6a38 100644 --- a/arm_compute/runtime/CL/functions/CLFastCorners.h +++ b/arm_compute/runtime/CL/functions/CLFastCorners.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -71,6 +71,19 @@ public: */ void configure(const ICLImage *input, float threshold, bool nonmax_suppression, ICLKeyPointArray *corners, unsigned int *num_corners, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source image. Data types supported: U8. + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] nonmax_suppression If true, non-maximum suppression is applied to detected corners before being placed in the array. + * @param[out] corners Array of keypoints to store the results. + * @param[in,out] num_corners Record number of corners in the array + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, float threshold, bool nonmax_suppression, ICLKeyPointArray *corners, unsigned int *num_corners, + BorderMode border_mode, uint8_t constant_border_value = 0); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLFill.h b/arm_compute/runtime/CL/functions/CLFill.h index c4ba257753..bb1216054f 100644 --- a/arm_compute/runtime/CL/functions/CLFill.h +++ b/arm_compute/runtime/CL/functions/CLFill.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -42,6 +42,13 @@ public: * @param[in] constant_value Constant value to use to fill tensor. */ void configure(ICLTensor *tensor, PixelValue constant_value); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] tensor Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] constant_value Constant value to use to fill tensor. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *tensor, PixelValue constant_value); }; } // namespace arm_compute #endif /*ARM_COMPUTE_CLFILL_H */ diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h index ded79e5cb6..250806b1d7 100644 --- a/arm_compute/runtime/CL/functions/CLFillBorder.h +++ b/arm_compute/runtime/CL/functions/CLFillBorder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -44,6 +44,15 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] tensor Source tensor. Data types supported: U8/S16 + * @param[in] border_width The border width + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); }; } #endif /*ARM_COMPUTE_FILLBORDER_H */ diff --git a/arm_compute/runtime/CL/functions/CLFlattenLayer.h b/arm_compute/runtime/CL/functions/CLFlattenLayer.h index b9ce236309..98cf49af48 100644 --- a/arm_compute/runtime/CL/functions/CLFlattenLayer.h +++ b/arm_compute/runtime/CL/functions/CLFlattenLayer.h @@ -47,6 +47,15 @@ public: * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input First input tensor to flatten with at least 3 dimensions. + * The dimensions above the third will be interpreted as batches. Data types supported: All. + * @param[out] output Output tensor with shape [w*h*d, input_batches] where: + * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLFlattenLayer * * @param[in] input First input tensor to flatten with at least 3 dimensions. diff --git a/arm_compute/runtime/CL/functions/CLFloor.h b/arm_compute/runtime/CL/functions/CLFloor.h index c4a893fdeb..2844a5642b 100644 --- a/arm_compute/runtime/CL/functions/CLFloor.h +++ b/arm_compute/runtime/CL/functions/CLFloor.h @@ -48,7 +48,7 @@ public: * @param[in] input Source tensor. Data type supported: F16/F32. * @param[out] output Destination tensor. Same as @p input */ - void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLFloor * * @param[in] input Source tensor info. Data type supported: F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h index cbd28603fc..188117f674 100644 --- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h +++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h @@ -52,6 +52,13 @@ public: * @param[out] output Destination tensor which stores the transposed input tensor. Data type supported: Same as @p input. */ void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor which stores the transposed input tensor. Data type supported: Same as @p input. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLFullyConnectedLayerReshapeWeights * * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. @@ -100,7 +107,16 @@ public: */ void configure(const ICLTensor *input) { - _func.configure(input, &_output); + configure(CLKernelLibrary::get().get_compile_context(), input); + } + /** Configures the @ref CLFullyConnectedLayerReshapeWeights function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input) + { + _func.configure(compile_context, input, &_output); } private: @@ -147,6 +163,23 @@ public: */ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor. The weights must be 2 dimensional. + * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. + * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. + * Data type supported: Same as @p input. + * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input. + * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix multiplication between: + * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer + * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. + * Data type supported: Same as @p input. + * @param[in] fc_info (Optional) Fully connected layer additional info + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, + FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLFullyConnectedLayer * * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. @@ -171,9 +204,9 @@ public: void prepare() override; private: - void configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info); - void configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info); - void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info); + void configure_fc_fc(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info); + void configure_conv_fc(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info); + void configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info); MemoryGroup _memory_group; IWeightsManager *_weights_manager; diff --git a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h index 650d2e528b..9057440fc6 100644 --- a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h +++ b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -67,6 +67,25 @@ public: void configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias, const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr, float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC + * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights + * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights + * @param[out] fused_weights Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights + * @param[out] fused_bias Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights + * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights + * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights + * @note if nullptr, bn_beta is set to 0.0 + * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights + * @note if nullptr, bn_gamma is set to 1.0 + * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. + * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to Convolution. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias, + const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr, + float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); /** Static function to check if given info will lead to a valid configuration of @ref CLFuseBatchNormalization * * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h index 7a4f12043e..f5588112ae 100644 --- a/arm_compute/runtime/CL/functions/CLGEMM.h +++ b/arm_compute/runtime/CL/functions/CLGEMM.h @@ -79,7 +79,18 @@ public: */ void configure(const ICLTensor *input, GEMMRHSMatrixInfo info) { - _kernel.configure(input, &_output, info); + configure(CLKernelLibrary::get().get_compile_context(), input, info); + } + + /** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] info RHS matrix information to be used for reshaping. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info) + { + _kernel.configure(compile_context, input, &_output, info); } private: @@ -134,6 +145,26 @@ public: * in case matrix A and matrix B have been already transformed. */ void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo()); + /** Initialise the kernel's inputs and output + * + * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. + * + * @note All tensors must have the same data type. + * + * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix + * + * @param[in] compile_context The compile context to be used. + * @param[in] a First input tensor (Matrix or Vector A). Data types supported: F16/F32 + * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a. + * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a. + * @param[out] output Output tensor. Data type supported: same as @p a + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of matrix C + * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and + * if the reshape of matrix B should happen only for the first run. GEMMInfo also contains information about the reshaping + * in case matrix A and matrix B have been already transformed. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMM. * * @param[in] a First input tensor info (Matrix or Vector A). Data types supported: F16/F32 @@ -156,10 +187,11 @@ public: private: static CLGEMMKernelType select_gemm_kernel(unsigned int m, unsigned int n, unsigned int k, DataType data_type, bool reshape_b_only_on_first_run); - void configure_native_v1(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info); - void configure_reshaped_v1(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info); - void configure_reshaped(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info); - void configure_reshaped_only_rhs(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info); + void configure_native_v1(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info); + void configure_reshaped_v1(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info); + void configure_reshaped_v2(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info); + void configure_reshaped_only_rhs(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, + const GEMMInfo &gemm_info); static Status validate_native_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); static Status validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h index 4952029c9d..6d1181eefe 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h @@ -62,6 +62,16 @@ public: * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout */ void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. + * @param[out] output Destination tensor. Data types supported: Same as @p weights. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayerReshapeWeights * * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. @@ -93,10 +103,21 @@ public: * @param[in] num_groups Number of groups when performing a grouped convolution. */ void configure(const ICLTensor *input, const ICLTensor *biases, unsigned int num_groups) + { + configure(CLKernelLibrary::get().get_compile_context(), input, biases, num_groups); + } + /** Configures the @ref CLConvolutionLayerReshapeWeights function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] biases Biases tensor. Data type supported: Same as @p input. + * @param[in] num_groups Number of groups when performing a grouped convolution. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, unsigned int num_groups) { _bias_bit = (biases != nullptr) ? 1 : 0; _num_groups = num_groups; - _func.configure(input, biases, &_output, num_groups); + _func.configure(compile_context, input, biases, &_output, num_groups); } //Inherited method override @@ -178,6 +199,28 @@ public: */ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QASYMM8/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. If this is not part of the fully connected layer the weights + * tensor has also been transposed with CLGEMMReshapeRHSMatrixKernel. Data type supported: Same as @p input. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -208,6 +251,7 @@ public: private: /** Configures the appropriate matrix multiply routine * + * @param[in] compile_context The compile context to be used. * @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32. * @param[in] weights Weights tensor. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. @@ -218,8 +262,9 @@ private: * @param[in] gemm_3d_depth Depth of GEMM 3D * @param[in] act_info Activation to apply after the matrix multiplication */ - void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth, - const ActivationLayerInfo &act_info); + void configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, + const GEMMLowpOutputStageInfo &gemmlowp_output_stage, + int gemm_3d_depth, const ActivationLayerInfo &act_info); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer matrix multiply routines * * @param[in] input Input tensor info. Data types supported: QASYMM8/F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h index 01687b69ec..d8710a461f 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h @@ -99,6 +99,17 @@ public: * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This function supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. */ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info); + /** Set the input, weights, biases and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. Data layout supported: NHWC + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. Data layout supported: same as @p input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as @p input. Data layout supported: same as @p input. + * @param[out] output Output tensor. The output has the same number of dimensions as the @p input. Data layout supported: same as @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This function supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info); /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayer * * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h index 1d7013d328..6ac3cefb76 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h @@ -72,6 +72,25 @@ public: * if the reshape of matrix B should be executed only for the first run */ void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info = GEMMInfo()); + /** Initialise the kernel's inputs, output + * + * @note GEMMLowp: low precision GEMM kernel. [A * B + C] + * This kernel performs the following computations: + * + * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them. + * -# Convert b values from QASYMM8 to int32 and add b_offset to each of them. + * -# Compute the matrix product of the resulting a * b in int32. + * -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE + * + * @param[in] compile_context The compile context to be used. + * @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED. + * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a + * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32 + * @param[out] output Output tensor. Data type supported: S32 or QASYMM8/QASYMM8_SIGNED if gemm_info.gemmlowp_output_stage != NONE + * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and + * if the reshape of matrix B should be executed only for the first run + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info = GEMMInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyCore * * @param[in] a First input tensor info (Matrix A). Data type supported: QASYMM8. diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h index 4c11e51950..06cb759b16 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h +++ b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h @@ -75,6 +75,23 @@ public: ARM_COMPUTE_DEPRECATED_REL(20.05) void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, int result_mult_int, int result_shift, int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); + /** Initialise the kernel's inputs, output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: QASYMM8 + * @param[in] result_offset Offset to be added to each element of the input matrix + * @param[in] result_mult_int Value to be multiplied to each element of the input matrix when once the result_offset has been add + * @param[in] result_shift Number of bits to shift right the result before converting back to QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. + */ + ARM_COMPUTE_DEPRECATED_REL(20.05) + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, int result_mult_int, int result_shift, + int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8Scale * * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32 @@ -137,6 +154,23 @@ public: */ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); + /** Initialise the kernel's inputs, output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: QASYMM8 + * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add + * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication + * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, + int result_offset_after_shift, + int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint * * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32 @@ -198,6 +232,23 @@ public: */ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); + /** Initialise the kernel's inputs, output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: QASYMM8_SIGNED + * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add + * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication + * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer. + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0 + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, + int result_offset_after_shift, + int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint * * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32 @@ -240,6 +291,23 @@ public: ARM_COMPUTE_DEPRECATED_REL(20.05) void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset, int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); + /** Initialise the kernel's inputs, output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: QASYMM8 + * @param[in] multiplier Float multiplier to be multiplied to each element of the input matrix + * @param[in] offset Offset to be applied to result before converting it back to QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. + */ + ARM_COMPUTE_DEPRECATED_REL(20.05) + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset, + int min = std::numeric_limits::lowest(), + int max = std::numeric_limits::max()); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint * * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32 @@ -300,6 +368,21 @@ public: */ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); + /** Initialise the kernel's inputs, output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: QSYMM16 + * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add + * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer. + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16. + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, + int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint * * @param[in] input Input tensor info. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32 @@ -336,6 +419,16 @@ public: * @param[in] info GEMMLowp output stage metadata. */ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info); + /** Initialise the kernel's inputs, output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] info GEMMLowp output stage metadata. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel * * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32 diff --git a/arm_compute/runtime/CL/functions/CLGather.h b/arm_compute/runtime/CL/functions/CLGather.h index 71843aa42a..dcd9efc6e0 100644 --- a/arm_compute/runtime/CL/functions/CLGather.h +++ b/arm_compute/runtime/CL/functions/CLGather.h @@ -43,6 +43,15 @@ public: * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 */ void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); + /** Initialise the kernel's inputs and outputs + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All. + * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); /** Static function to check if given info will lead to a valid configuration of @ref CLGatherKernel * diff --git a/arm_compute/runtime/CL/functions/CLGaussian3x3.h b/arm_compute/runtime/CL/functions/CLGaussian3x3.h index 2caf6c9d74..f1906cde92 100644 --- a/arm_compute/runtime/CL/functions/CLGaussian3x3.h +++ b/arm_compute/runtime/CL/functions/CLGaussian3x3.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,6 +50,15 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); }; } #endif /*ARM_COMPUTE_CLGAUSSIAN3X3_H */ diff --git a/arm_compute/runtime/CL/functions/CLGaussian5x5.h b/arm_compute/runtime/CL/functions/CLGaussian5x5.h index 5d121a4488..d4ed772342 100644 --- a/arm_compute/runtime/CL/functions/CLGaussian5x5.h +++ b/arm_compute/runtime/CL/functions/CLGaussian5x5.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -62,6 +62,15 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h index aa90a5d4e3..a75a4d1028 100644 --- a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h +++ b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -65,6 +65,16 @@ public: * */ virtual void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value = 0) = 0; + /** Initialise the function's source, destinations and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] pyramid Destination pyramid tensors, Data types supported at each level: U8. + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + virtual void configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value = 0) = 0; protected: ICLTensor *_input; @@ -86,6 +96,7 @@ public: // Inherited methods overridden: void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; + void configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; void run() override; private: @@ -109,6 +120,7 @@ public: // Inherited methods overridden: void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; + void configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; void run() override; private: diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h index fb6967f7e5..91b30fabcb 100644 --- a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h +++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h @@ -85,6 +85,24 @@ public: */ void configure(const ICLTensor *scores, const ICLTensor *deltas, const ICLTensor *anchors, ICLTensor *proposals, ICLTensor *scores_out, ICLTensor *num_valid_proposals, const GenerateProposalsInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] scores Scores from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors. + * Data types supported: QASYMM8/F16/F32 + * @param[in] deltas Bounding box deltas from convolution layer of size (W, H, 4*A). Data types supported: Same as @p scores + * @param[in] anchors Anchors tensor of size (4, A). Data types supported: QSYMM16 with scale of 0.125 if @p scores is QASYMM8, otherwise same as @p scores + * @param[out] proposals Box proposals output tensor of size (5, W*H*A). + * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p scores is QASYMM8, otherwise same as @p scores + * @param[out] scores_out Box scores output tensor of size (W*H*A). Data types supported: Same as @p scores + * @param[out] num_valid_proposals Scalar output tensor which says which of the first proposals are valid. Data types supported: U32 + * @param[in] info Contains GenerateProposals operation information described in @ref GenerateProposalsInfo + * + * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the @ref GenerateProposalsInfo struct. + * @note Proposals contains all the proposals. Of those, only the first num_valid_proposals are valid. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *scores, const ICLTensor *deltas, const ICLTensor *anchors, ICLTensor *proposals, ICLTensor *scores_out, + ICLTensor *num_valid_proposals, const GenerateProposalsInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLGenerateProposalsLayer * diff --git a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h index 3214e8c3f1..71280c898a 100644 --- a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h +++ b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,17 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialise the function's source, destination, HOG data-object and border mode + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Input tensor. Data type supported: U8 + * (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block + * @param[in] hog HOG data object which describes the HOG descriptor + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0); // Inherited method overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLHOGDetector.h b/arm_compute/runtime/CL/functions/CLHOGDetector.h index 6703de9f35..c2bdc15b35 100644 --- a/arm_compute/runtime/CL/functions/CLHOGDetector.h +++ b/arm_compute/runtime/CL/functions/CLHOGDetector.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -64,6 +64,22 @@ public: * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to */ void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0); + /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class + * + * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. It is the output of @ref CLHOGDescriptor. Data type supported: F32 + * @param[in] hog HOG data-object that describes the HOG descriptor + * @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the block stride stored in hog + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, + float threshold = 0.0f, + size_t idx_class = 0); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLHOGGradient.h b/arm_compute/runtime/CL/functions/CLHOGGradient.h index ec4a187864..450a4a6045 100644 --- a/arm_compute/runtime/CL/functions/CLHOGGradient.h +++ b/arm_compute/runtime/CL/functions/CLHOGGradient.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -61,6 +61,19 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialise the function's source, destinations, phase type and border mode + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Input tensor. Data type supported: U8. + * (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_magnitude Output tensor (magnitude). Data type supported: U16. + * @param[out] output_phase Output tensor.(phase). Format supported: U8 + * @param[in] phase_type Type of @ref PhaseType + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode, + uint8_t constant_border_value = 0); // Inherited method overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h index 424c69dad8..3d22ff69ee 100644 --- a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h +++ b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -82,8 +82,29 @@ public: * */ void configure(ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides, BorderMode border_mode, - uint8_t constant_border_value = 0, - float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f); + uint8_t constant_border_value = 0, float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f); + /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Input tensor. Data type supported: U8 + * (Written to only for @p border_mode != UNDEFINED) + * @param[in] multi_hog Container of multiple HOG data object. Each HOG data object describes one HOG model to detect. + * This container should store the HOG data-objects in descending or ascending cell_size width order. + * This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects + * @param[out] detection_windows Array of @ref DetectionWindow used for locating the detected objects + * @param[in] detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object + * The dimension of this array must be the same of multi_hog->num_models() + * The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] non_maxima_suppression (Optional) Flag to specify whether the non-maxima suppression is required or not. + * True if the non-maxima suppression stage has to be computed + * @param[in] min_distance (Optional) Radial Euclidean distance to use for the non-maxima suppression stage + * + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides, + BorderMode border_mode, uint8_t constant_border_value = 0, float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f); // Inherited method overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLHarrisCorners.h b/arm_compute/runtime/CL/functions/CLHarrisCorners.h index 6c89d6dea6..2d0e78b00e 100644 --- a/arm_compute/runtime/CL/functions/CLHarrisCorners.h +++ b/arm_compute/runtime/CL/functions/CLHarrisCorners.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -82,6 +82,23 @@ public: void configure(ICLImage *input, float threshold, float min_dist, float sensitivity, int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners, BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source image. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[in] threshold Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] min_dist Radial Euclidean distance for the euclidean distance stage. + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation + * @param[in] gradient_size The gradient window size to use on the input. The implementation supports 3, 5, and 7 + * @param[in] block_size The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7. + * @param[out] corners Array of keypoints to store the results. + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. + */ + void configure(const CLCompileContext &compile_context, ICLImage *input, float threshold, float min_dist, float sensitivity, + int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners, + BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLHistogram.h b/arm_compute/runtime/CL/functions/CLHistogram.h index ad389248f7..6d34dd7060 100644 --- a/arm_compute/runtime/CL/functions/CLHistogram.h +++ b/arm_compute/runtime/CL/functions/CLHistogram.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -55,6 +55,13 @@ public: * @param[out] output Output distribution. */ void configure(const ICLImage *input, ICLDistribution1D *output); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source image. Data types supported: U8 + * @param[out] output Output distribution. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h index ddd4b12eca..4614b90c70 100644 --- a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h @@ -51,6 +51,18 @@ public: * @param[in] use_mixed_precision (Optional) Use mixed precision in case of FP16 execution */ void configure(ICLTensor *input, ICLTensor *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f, bool use_mixed_precision = true); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization. + * Data types supported: F16/F32. Data layout supported: NHWC, NCHW + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. + * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0 + * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0 + * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12 + * @param[in] use_mixed_precision (Optional) Use mixed precision in case of FP16 execution + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f, bool use_mixed_precision = true); /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer. * diff --git a/arm_compute/runtime/CL/functions/CLIntegralImage.h b/arm_compute/runtime/CL/functions/CLIntegralImage.h index 2a452a97a3..1ea189bf33 100644 --- a/arm_compute/runtime/CL/functions/CLIntegralImage.h +++ b/arm_compute/runtime/CL/functions/CLIntegralImage.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -48,6 +48,13 @@ public: * @param[out] output Destination tensor, Data types supported: U32. */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U32. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h index e200dc758e..91c547b2cc 100644 --- a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h +++ b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,15 @@ public: * @param[in] epsilon (Optional) Lower bound value for the normalization. */ void configure(ICLTensor *input, ICLTensor *output, int axis, float epsilon = 1e-12f); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 + * @param[in] epsilon (Optional) Lower bound value for the normalization. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, int axis, float epsilon = 1e-12f); /** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayer. * diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h index a94f239472..a29513aaae 100644 --- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h +++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h @@ -102,6 +102,52 @@ public: const ICLTensor *output_state_in, const ICLTensor *cell_state_in, ICLTensor *scratch_buffer, ICLTensor *output_state_out, ICLTensor *cell_state_out, ICLTensor *output, const LSTMParams &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f); + /** Initialize function's tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32. + * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. + * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. + * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. + * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. + * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. + * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. + * @param[in] forget_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. + * @param[in] cell_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. + * @param[in] output_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. + * @param[in] output_state_in 2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input. + * @param[in] cell_state_in 2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input. + * @param[out] scratch_buffer 2D tensor with dimensions [num_units * 4, batch_size] with CIFG or [num_units * 3, batch_size] without CIGF. Data type supported: Same as @p input. + * @param[out] output_state_out 2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input. + * @param[out] cell_state_out 2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input. + * @param[out] output Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. + * Data types supported: Same as @p input. + * @param[in] lstm_params Weights tensors used in peephole optimization: + * input_to_input_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. + * recurrent_to_input_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. + * cell_to_input_weights 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: Same as @p input. + * cell_to_forget_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. + * cell_to_output_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. + * input_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input + * projection_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. + * projection_bias 1D weights tensor with dimensions [output_size]. Data type supported: Same as @p input. + * input_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. + * forget_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. + * cell_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. + * output_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. + * @param[in] activation_info Contains activation information described in @ref ActivationLayerInfo. + * @param[in] cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip]. + * If set to 0.0f then clipping is disabled. + * @param[in] projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip]. + * If set to 0.0f then clipping is disabled. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, + const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, + const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, + const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, + const ICLTensor *output_state_in, const ICLTensor *cell_state_in, + ICLTensor *scratch_buffer, ICLTensor *output_state_out, ICLTensor *cell_state_out, ICLTensor *output, + const LSTMParams &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f); /** Static function to check if given info will lead to a valid configuration of @ref CLLSTMLayer * diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h index 1d39060088..082fdb4499 100644 --- a/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h +++ b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -97,6 +97,33 @@ public: const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, ICLTensor *cell_state_in, const ICLTensor *output_state_in, ICLTensor *cell_state_out, ICLTensor *output_state_out); + /** Initialize function's tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8. + * @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. + * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. + * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. + * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. + * @param[in] recurrent_to_input_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. + * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. + * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. + * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. + * @param[in] input_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. + * @param[in] forget_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. + * @param[in] cell_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. + * @param[in] output_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. + * @param[in] cell_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16. + * @param[in] output_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input. + * @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16. + * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, + const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, + const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, + const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, + ICLTensor *cell_state_in, const ICLTensor *output_state_in, + ICLTensor *cell_state_out, ICLTensor *output_state_out); /** Static function to check if given info will lead to a valid configuration of @ref CLLSTMLayerQuantized * diff --git a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h index a407e981da..49a87baaf2 100644 --- a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h +++ b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -68,6 +68,19 @@ public: * */ void configure(ICLTensor *input, CLPyramid *pyramid, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] pyramid Destination pyramid tensors, Data types supported at each level: S16. + * @param[out] output The lowest resolution tensor necessary to reconstruct the input tensor from the pyramid. Data types supported: S16. + * The first two dimensions of this tensor must match the first two dimensions of the tensor in the last level of the pyramid, that is: + * output.width = input.width() / pow(2,pyramid_levels-1) and out.height = in.height() / pow(2,pyramid_levels-1) + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h index 3407f46887..2c7afde7de 100644 --- a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h +++ b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -77,6 +77,22 @@ public: * */ void configure(const CLPyramid *pyramid, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value); + /** Initialise the function's source, destinations and border mode. + * + * The Output image must have the same size as the first level of the pyramid. + * The Input image must have the same size as the last level of the pyramid. + * + * The idea is to reconstuct the original hi-res image from a low-res representation of it and the laplacian pyramid. + * + * @param[in] compile_context The compile context to be used. + * @param[in] pyramid Laplacian pyramid tensors, Data types supported at each level: S16. + * @param[in] input Source tensor. Data types supported: S16. + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(const CLCompileContext &compile_context, const CLPyramid *pyramid, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h index 1186a449d5..7a43eab478 100644 --- a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h +++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -73,6 +73,19 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. */ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F32. + * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); /** Static function to check if given info will lead to a valid configuration of @ref CLLocallyConnectedLayer * * @param[in] input Input tensor info. 3 lower dimensions represent a single input [width, height, IFM], diff --git a/arm_compute/runtime/CL/functions/CLMagnitude.h b/arm_compute/runtime/CL/functions/CLMagnitude.h index 2f5932b5ab..e52ab240e4 100644 --- a/arm_compute/runtime/CL/functions/CLMagnitude.h +++ b/arm_compute/runtime/CL/functions/CLMagnitude.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,6 +43,15 @@ public: * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM); + /** Initialise the kernel's inputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 First tensor input. Data types supported: S16. + * @param[in] input2 Second tensor input. Data types supported: S16. + * @param[out] output Output tensor. Data types supported: S16. + * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM); }; } #endif /*ARM_COMPUTE_CLMAGNITUDE_H */ diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDev.h b/arm_compute/runtime/CL/functions/CLMeanStdDev.h index fea1ed194f..561ac04f1d 100644 --- a/arm_compute/runtime/CL/functions/CLMeanStdDev.h +++ b/arm_compute/runtime/CL/functions/CLMeanStdDev.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -57,6 +57,14 @@ public: * @param[out] stddev (Optional) Output standard deviation of pixel values. */ void configure(ICLImage *input, float *mean, float *stddev = nullptr); + /** Initialise the kernel's inputs and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Input image. Data types supported: U8/F16/F32. (Written to only for border filling) + * @param[out] mean Output average pixel value. + * @param[out] stddev (Optional) Output standard deviation of pixel values. + */ + void configure(const CLCompileContext &compile_context, ICLImage *input, float *mean, float *stddev = nullptr); /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDev * * @param[in] input Input image. Data types supported: U8/F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h index 565f8f3040..e39a5908b8 100644 --- a/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -44,6 +44,16 @@ public: * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. */ void configure(ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f); + /** Initialise the function's input and outputs. + * + * @note If the output tensor is a nullptr, the normalization will be performed in-place. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Input tensor with 2 dimensions. Data types supported: F16/F32. + * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input + * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f); /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevNormalizationKernel * * @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr, diff --git a/arm_compute/runtime/CL/functions/CLMedian3x3.h b/arm_compute/runtime/CL/functions/CLMedian3x3.h index 3a9a95a5f3..f3bb2832ef 100644 --- a/arm_compute/runtime/CL/functions/CLMedian3x3.h +++ b/arm_compute/runtime/CL/functions/CLMedian3x3.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,6 +50,15 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); }; } #endif /*ARM_COMPUTE_CLMEDIAN3X3_H */ diff --git a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h index 30a29f2b8c..e9e3bd910c 100644 --- a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h +++ b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -66,6 +66,22 @@ public: void configure(const ICLImage *input, void *min, void *max, CLCoordinates2DArray *min_loc = nullptr, CLCoordinates2DArray *max_loc = nullptr, uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); + /** Initialise the kernel's inputs and outputs. + * + * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input image. Data types supported: U8/S16/F32. + * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. + * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. + * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. + * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. + * @param[out] min_count (Optional) Number of minimum value encounters. + * @param[out] max_count (Optional) Number of maximum value encounters. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, void *min, void *max, + CLCoordinates2DArray *min_loc = nullptr, CLCoordinates2DArray *max_loc = nullptr, + uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h index a7c87d35b0..79f73ea370 100644 --- a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h +++ b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -56,6 +56,20 @@ public: */ void configure(ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data types supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, + BorderMode border_mode, uint8_t constant_border_value = 0); }; } #endif /*ARM_COMPUTE_CLNONLINEARFILTER_H */ diff --git a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h index 0859a09bdb..e2c0c4f814 100644 --- a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h +++ b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,6 +50,18 @@ public: * The implementation supports just 2 border modes: UNDEFINED and CONSTANT */ void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode); + /** Initialise the function's source, destinations and border mode. + * + * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT + * The constant values used with CONSTANT border mode is 0 + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8, F32. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination for the Non-Maxima suppressions 3x3. Data types supported: same as @p input. + * @param[in] border_mode Border mode to use for non-maxima suppression. + * The implementation supports just 2 border modes: UNDEFINED and CONSTANT + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode); }; } #endif /* ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H */ diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h index d06bf56794..07bb62c7d7 100644 --- a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -58,6 +58,17 @@ public: * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. */ void configure(ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32 (Written to by the border handler). + * Data layouts supported: NCHW/NHWC. + * @param[out] output Destination tensor. Dimensions, data type and number of channels must match the input ones. + * Data types supported: same as @p input. Data layouts supported: same as @p input. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info); /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayer * * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], diff --git a/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h b/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h index 5fbfdd18b7..5dd3760d3d 100644 --- a/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h +++ b/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h @@ -50,6 +50,17 @@ public: * Data types supported: Same as @p input */ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels]. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destinationfeature tensor. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: Same as @p input + * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels. + * Data types supported: Same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std); /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayer * * @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels]. diff --git a/arm_compute/runtime/CL/functions/CLOpticalFlow.h b/arm_compute/runtime/CL/functions/CLOpticalFlow.h index 33df175287..12d0583384 100644 --- a/arm_compute/runtime/CL/functions/CLOpticalFlow.h +++ b/arm_compute/runtime/CL/functions/CLOpticalFlow.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -91,6 +91,27 @@ public: const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points, Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialise the function input and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data types supported U8 + * @param[in] new_pyramid Pointer to the pyramid for the new tensor. Data types supported U8 + * @param[in] old_points Pointer to the IKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points + * @param[out] new_points Pointer to the IKeyPointArray storing new key points + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminate the alogrithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] border_mode The border mode applied at scharr kernel stage + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT + * + */ + void configure(const CLCompileContext &compile_context, const CLPyramid *old_pyramid, const CLPyramid *new_pyramid, + const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points, + Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate, + BorderMode border_mode, uint8_t constant_border_value = 0); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLPReluLayer.h b/arm_compute/runtime/CL/functions/CLPReluLayer.h index 7f8a41238c..74fa86a320 100644 --- a/arm_compute/runtime/CL/functions/CLPReluLayer.h +++ b/arm_compute/runtime/CL/functions/CLPReluLayer.h @@ -47,6 +47,16 @@ public: * @param[out] output Destination tensor. Data type supported: same as @p input */ void configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output); + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] alpha PRelu layer parameters. Data types supported: same of @p input. + * @param[out] output Destination tensor. Data type supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *alpha, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLPReluLayer * * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLPadLayer.h b/arm_compute/runtime/CL/functions/CLPadLayer.h index f020d68c92..82d7205381 100644 --- a/arm_compute/runtime/CL/functions/CLPadLayer.h +++ b/arm_compute/runtime/CL/functions/CLPadLayer.h @@ -63,6 +63,19 @@ public: * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). */ void configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] + * specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding. + * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, + * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), + PaddingMode mode = PaddingMode::CONSTANT); /** Static function to check if given info will lead to a valid configuration of @ref CLPadLayer. * diff --git a/arm_compute/runtime/CL/functions/CLPermute.h b/arm_compute/runtime/CL/functions/CLPermute.h index b1705cf4c5..37e651cfbb 100644 --- a/arm_compute/runtime/CL/functions/CLPermute.h +++ b/arm_compute/runtime/CL/functions/CLPermute.h @@ -46,6 +46,16 @@ public: * @param[in] perm Permutation vector */ void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm); + /** Set the input and output tensors. + * + * @note Arbitrary permutation vectors are supported with rank not greater than 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to permute. Data types supported: All. + * @param[in] output The output tensor. Data types supported: Same as @p input + * @param[in] perm Permutation vector + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm); /** Static function to check if given info will lead to a valid configuration of @ref CLPermute. * * @note Arbitrary permutation vectors are supported with rank not greater than 4 diff --git a/arm_compute/runtime/CL/functions/CLPhase.h b/arm_compute/runtime/CL/functions/CLPhase.h index ac8a8670fc..f993906fe2 100644 --- a/arm_compute/runtime/CL/functions/CLPhase.h +++ b/arm_compute/runtime/CL/functions/CLPhase.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,6 +43,15 @@ public: * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, PhaseType phase_type = PhaseType::SIGNED); + /** Initialise the kernel's inputs, output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 First tensor input. Data types supported: S16. + * @param[in] input2 Second tensor input. Data types supported: S16. + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, PhaseType phase_type = PhaseType::SIGNED); }; } #endif /*ARM_COMPUTE_CLPHASE_H */ diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h index 47bb2bf4db..8b0ee70f12 100644 --- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h +++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h @@ -59,6 +59,22 @@ public: */ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 An input tensor. Data types supported: same as @p input1. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate + * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale, + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplication * * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. @@ -98,6 +114,17 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Initialise the kernel's inputs, output. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input1 An input tensor. Data types supported: F32. Number of channels supported: 2. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplication * * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2. diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h index 05b35dcee8..7d646ab268 100644 --- a/arm_compute/runtime/CL/functions/CLPoolingLayer.h +++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h @@ -49,6 +49,15 @@ public: * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. */ void configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr); /** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayer * * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h b/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h index eea1399552..d39e4112f9 100644 --- a/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h +++ b/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -46,6 +46,15 @@ public: * @param[in] info Prior box layer info. */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. + * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 + * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1 + * @param[in] info Prior box layer info. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLPriorBoxLayer * * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC. diff --git a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h index ab34135ff5..72a61f8505 100644 --- a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h +++ b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h @@ -113,6 +113,55 @@ public: ICLTensor *cell_state_out, ICLTensor *output_state_out, const LSTMParams &lstm_params); + /** Initialize function's tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED. + * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8. + * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8. + * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8. + * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8. + * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8. + * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8. + * @param[in] forget_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32. + * @param[in] cell_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32. + * @param[in] output_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32. + * @param[in] cell_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16. + * @param[in] output_state_in 2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input. + * @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16. + * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [num_units, batch_size].Data types supported: Same as @p input. + * @param[in] lstm_params Weights tensors used in peephole, CIFG and layer normalization optimizations: + * input_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at input gate. + * forget_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at forget gate. + * cell_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at cell gate. + * output_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at output gate. + * hidden_state_zero The zero point of the hidden state. + * hidden_state_scale The scale of the hidden state. + * input_to_input_weights (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8. + * recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8. + * cell_to_input_weights (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: QSYMM16. + * cell_to_forget_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16. + * cell_to_output_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16. + * input_gate_bias (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: S32. + * projection_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8. + * projection_bias (Optional) 1D weights tensor with dimensions [output_size]. S32. + * input_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16. + * forget_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16. + * cell_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16. + * output_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16. + * cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip]. + * If set to 0.0 then clipping is disabled. + * projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within + * [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, + const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, + const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, + const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, + const ICLTensor *cell_state_in, const ICLTensor *output_state_in, + ICLTensor *cell_state_out, ICLTensor *output_state_out, + const LSTMParams &lstm_params); + /** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayer * * @param[in] input Source tensor info. Input is a 2D tensor info with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED. @@ -169,19 +218,20 @@ public: private: /** Internal method to configure matrix multiplication plus output stage of each gate. * - * @param[in] mm Matrix multiplication function to use. - * @param[in] outstage Output stage function to use. - * @param[in] gemmlowp_info GEMMLowp metadata to be used by the output stage. - * @param[in] mm_input Input tensor to matrix multiplication function. - * @param[in] mm_weights Weights tensor to matrix multiplication function. - * @param[in] bias Bias tensor to matrix multiplication function. - * @param[in] outstage_res Tensor to be used for storing the result of the output stage. - * @param[in] gemmlowp_scale Real multiplier to be used computing multiplier and shift for requantization. - * @param[in] mm_res_info Tensor info to be used to initialize matrix multiplication result tensor. - * @param[in] mm_res_info Tensor info to be used to initialize output stage result tensor. + * @param[in] compile_context The compile context to be used. + * @param[in] mm Matrix multiplication function to use. + * @param[in] outstage Output stage function to use. + * @param[in] gemmlowp_info GEMMLowp metadata to be used by the output stage. + * @param[in] mm_input Input tensor to matrix multiplication function. + * @param[in] mm_weights Weights tensor to matrix multiplication function. + * @param[in] bias Bias tensor to matrix multiplication function. + * @param[in] outstage_res Tensor to be used for storing the result of the output stage. + * @param[in] gemmlowp_scale Real multiplier to be used computing multiplier and shift for requantization. + * @param[in] mm_res_info Tensor info to be used to initialize matrix multiplication result tensor. + * @param[in] mm_res_info Tensor info to be used to initialize output stage result tensor. * */ - void configure_mm(CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info, + void configure_mm(const CLCompileContext &compile_context, CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info, const ICLTensor *mm_input, const ICLTensor *mm_weights, const ICLTensor *bias, CLTensor *mm_res, CLTensor *outstage_res, float gemmlowp_scale, const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info); diff --git a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h index fbdef53aeb..f59e3b7919 100644 --- a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h @@ -48,6 +48,15 @@ public: * @note Output auto initialization is not supported by this function */ void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32. + * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. + * + * @note Output auto initialization is not supported by this function + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLQuantizationLayer * * @param[in] input Input tensor info. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32. diff --git a/arm_compute/runtime/CL/functions/CLRNNLayer.h b/arm_compute/runtime/CL/functions/CLRNNLayer.h index 569e3da89e..0291eb17a9 100644 --- a/arm_compute/runtime/CL/functions/CLRNNLayer.h +++ b/arm_compute/runtime/CL/functions/CLRNNLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -52,6 +52,19 @@ public: * @param[in] info Activation layer parameter. */ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *recurrent_weights, const ICLTensor *bias, ICLTensor *hidden_state, ICLTensor *output, ActivationLayerInfo &info); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32 + * @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies the input. Data types supported: Same as @p input + * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the current 'state'. Data types supported: Same as @p input + * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same as @p input + * @param[out] output Output tensor of shape [num_units, batch_size]. Data types supported: Same as @p input + * @param[in,out] hidden_state Output tensor of shape [num_units, batch_size]. Data types supported: Same as @p input + * @param[in] info Activation layer parameter. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *recurrent_weights, const ICLTensor *bias, ICLTensor *hidden_state, + ICLTensor *output, ActivationLayerInfo &info); /** Initialize the function * * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32 diff --git a/arm_compute/runtime/CL/functions/CLROIAlignLayer.h b/arm_compute/runtime/CL/functions/CLROIAlignLayer.h index 7c2c6eb26f..b6defe6c7f 100644 --- a/arm_compute/runtime/CL/functions/CLROIAlignLayer.h +++ b/arm_compute/runtime/CL/functions/CLROIAlignLayer.h @@ -56,6 +56,22 @@ public: * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. */ void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner + * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. + * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); /** Static function to check if given info will lead to a valid configuration of @ref CLROIAlignLayer * * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h index 7d0e1da4f8..0376e7847c 100644 --- a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h +++ b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -56,6 +56,21 @@ public: * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. */ void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: F16/F32. + * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner + * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16 + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); }; } #endif /* ARM_COMPUTE_CLROIPOOLINGLAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLRange.h b/arm_compute/runtime/CL/functions/CLRange.h index 2cc8376b72..19e11bacd4 100644 --- a/arm_compute/runtime/CL/functions/CLRange.h +++ b/arm_compute/runtime/CL/functions/CLRange.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -47,6 +47,15 @@ public: * @param[in] step The gap between each pair of values in the sequence. Default is 1. */ void configure(ICLTensor *output, float start, float end, float step = 1.f); + /** Initialize the kernel's start, end, step and output tensor. + * + * @param[in] compile_context The compile context to be used. + * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] start The starting value of the sequence. + * @param[in] end The ending (not including) value of the sequence. + * @param[in] step The gap between each pair of values in the sequence. Default is 1. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *output, float start, float end, float step = 1.f); /** Static function to check if given info will lead to a valid configuration of @ref CLRange * * @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLReduceMean.h b/arm_compute/runtime/CL/functions/CLReduceMean.h index 30000edd62..57ec48d690 100644 --- a/arm_compute/runtime/CL/functions/CLReduceMean.h +++ b/arm_compute/runtime/CL/functions/CLReduceMean.h @@ -51,6 +51,17 @@ public: * @param[out] output Destination tensor. Data type supported: Same as @p input */ void configure(ICLTensor *input, const Coordinates &reduction_axis, bool keep_dims, ICLTensor *output); + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] reduction_axis Reduction axis vector. + * @param[in] keep_dims If positive, retains reduced dimensions with length 1. + * @param[out] output Destination tensor. Data type supported: Same as @p input + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, const Coordinates &reduction_axis, bool keep_dims, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLReduceMean * diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h index 254c7309fd..25cf655802 100644 --- a/arm_compute/runtime/CL/functions/CLReductionOperation.h +++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h @@ -61,6 +61,16 @@ public: * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true. */ void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims = true); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3 + * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX + * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims = true); /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperation. * diff --git a/arm_compute/runtime/CL/functions/CLRemap.h b/arm_compute/runtime/CL/functions/CLRemap.h index f035ac902c..dc8a2c4ecf 100644 --- a/arm_compute/runtime/CL/functions/CLRemap.h +++ b/arm_compute/runtime/CL/functions/CLRemap.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -54,6 +54,20 @@ public: */ void configure(ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialise the function's sources, destination, interpolation policy and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[in] map_x Map for X coords. Data types supported: F32. + * @param[in] map_y Map for Y coords. Data types supported: F32. + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported. + * @param[in] border_mode Border mode to use on the input tensor. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, + InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); }; } #endif /*ARM_COMPUTE_CLREMAP_H */ diff --git a/arm_compute/runtime/CL/functions/CLReorgLayer.h b/arm_compute/runtime/CL/functions/CLReorgLayer.h index dd08c0f1fc..8b245ab441 100644 --- a/arm_compute/runtime/CL/functions/CLReorgLayer.h +++ b/arm_compute/runtime/CL/functions/CLReorgLayer.h @@ -45,6 +45,18 @@ public: * */ void configure(ICLTensor *input, ICLTensor *output, int32_t stride); + /** Initialise the function's source and destination. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor with tensor shape: + * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has + * the same number of input elements. Data types supported: same as @p input. + * @param[in] stride Stride value to use for reorganizing the values in the output tensor. + * It defines the spatial distance between 2 consecutive pixels in the x and y direction + * + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, int32_t stride); /** Static function to check if given info will lead to a valid configuration of @ref CLReorgLayer * * @param[in] input Source tensor. Data types supported: All. diff --git a/arm_compute/runtime/CL/functions/CLReshapeLayer.h b/arm_compute/runtime/CL/functions/CLReshapeLayer.h index 63fe5457a3..e91c2c739b 100644 --- a/arm_compute/runtime/CL/functions/CLReshapeLayer.h +++ b/arm_compute/runtime/CL/functions/CLReshapeLayer.h @@ -40,6 +40,13 @@ public: * @param[out] output Output tensor. Data type supported: Same as @p input */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's inputs and outputs + * + * @param[in] compile_context The compile context to be used. + * @param[in] input First tensor input. Data type supported: All + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLReshapeLayer * diff --git a/arm_compute/runtime/CL/functions/CLReverse.h b/arm_compute/runtime/CL/functions/CLReverse.h index f87bd19a90..87ae34c89d 100644 --- a/arm_compute/runtime/CL/functions/CLReverse.h +++ b/arm_compute/runtime/CL/functions/CLReverse.h @@ -41,6 +41,14 @@ public: * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 */ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis); /** Static function to check if given info will lead to a valid configuration of @ref CLReverseKernel * * @param[in] input Input tensor info. Data types supported: All. diff --git a/arm_compute/runtime/CL/functions/CLScale.h b/arm_compute/runtime/CL/functions/CLScale.h index c06c9b629a..f345995138 100644 --- a/arm_compute/runtime/CL/functions/CLScale.h +++ b/arm_compute/runtime/CL/functions/CLScale.h @@ -51,6 +51,21 @@ public: */ void configure(ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue(), SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, bool align_corners = false); + /** Initialize the function's source, destination, interpolation type and border_mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data types supported: Same as @p input + * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER + * @param[in] use_padding (Optional) Is padding in use or not. Defaults to true. + * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue(), + SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, bool align_corners = false); /** Static function to check if given info will lead to a valid configuration of @ref CLScale * diff --git a/arm_compute/runtime/CL/functions/CLScharr3x3.h b/arm_compute/runtime/CL/functions/CLScharr3x3.h index 708243260c..b25b548eaa 100644 --- a/arm_compute/runtime/CL/functions/CLScharr3x3.h +++ b/arm_compute/runtime/CL/functions/CLScharr3x3.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -53,6 +53,18 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Scharr 3x3 convolution along the X axis. Data types supported: S16. + * @param[out] output_y (optional) Destination for the Scharr 3x3 convolution along the Y axis. Data types supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); }; } #endif /*ARM_COMPUTE_CLSCHARR3X3_H */ diff --git a/arm_compute/runtime/CL/functions/CLSelect.h b/arm_compute/runtime/CL/functions/CLSelect.h index a7e06e4eec..84d0997149 100644 --- a/arm_compute/runtime/CL/functions/CLSelect.h +++ b/arm_compute/runtime/CL/functions/CLSelect.h @@ -45,6 +45,15 @@ public: * @param[out] output Output tensor. Data types supported: Same as @p x. */ void configure(const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output); + /** Initialise the kernel's inputs and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] c Condition input tensor. Data types supported: U8. + * @param[in] x First input tensor. Data types supported: All. + * @param[in] y Second input tensor. Data types supported: Same as @p x + * @param[out] output Output tensor. Data types supported: Same as @p x. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLSelect * * @param[in] c Condition input tensor. Data types supported: U8. diff --git a/arm_compute/runtime/CL/functions/CLSlice.h b/arm_compute/runtime/CL/functions/CLSlice.h index f5fca43874..a8c6e1ff0b 100644 --- a/arm_compute/runtime/CL/functions/CLSlice.h +++ b/arm_compute/runtime/CL/functions/CLSlice.h @@ -48,6 +48,20 @@ public: * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). */ void configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends); + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * @note Start indices must be non-negative. 0 <= starts[i] + * @note End coordinates can be negative, which represents the number of elements before the end of that dimension. + * @note End indices are not inclusive unless negative. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: All. + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends); /** Static function to check if given info will lead to a valid configuration of @ref CLSlice * diff --git a/arm_compute/runtime/CL/functions/CLSobel3x3.h b/arm_compute/runtime/CL/functions/CLSobel3x3.h index 2f4cf50465..24bc0cda43 100644 --- a/arm_compute/runtime/CL/functions/CLSobel3x3.h +++ b/arm_compute/runtime/CL/functions/CLSobel3x3.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -53,6 +53,18 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 3x3 convolution along the X axis. Data types supported: S16. + * @param[out] output_y (optional) Destination for the Sobel 3x3 convolution along the Y axis. Data types supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); }; } #endif /*ARM_COMPUTE_CLSOBEL3X3_H */ diff --git a/arm_compute/runtime/CL/functions/CLSobel5x5.h b/arm_compute/runtime/CL/functions/CLSobel5x5.h index 2a9136b92e..bf266270c3 100644 --- a/arm_compute/runtime/CL/functions/CLSobel5x5.h +++ b/arm_compute/runtime/CL/functions/CLSobel5x5.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -65,6 +65,18 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 5x5 convolution along the X axis. Data types supported: S16. + * @param[out] output_y (optional) Destination for the Sobel 5x5 convolution along the Y axis. Data types supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLSobel7x7.h b/arm_compute/runtime/CL/functions/CLSobel7x7.h index e3188b85f5..13932c704a 100644 --- a/arm_compute/runtime/CL/functions/CLSobel7x7.h +++ b/arm_compute/runtime/CL/functions/CLSobel7x7.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -65,6 +65,18 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 7x7 convolution along the X axis. Data types supported: S32. + * @param[out] output_y (optional) Destination for the Sobel 7x7 convolution along the Y axis. Data types supported: S32. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h index 751b68d0cf..fadbc430e6 100644 --- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -67,6 +67,17 @@ public: * when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image. */ void configure(const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t axis = 1); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f + * @param[in] axis (Optional) Reduction axis. It has the purpose of squashing the first @p axis + * dimensions together. For instance, given a [4x4x4x4] image, + * when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t axis = 1); /** Static function to check if given info will lead to a valid configuration of @ref CLSoftmaxLayer * * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 @@ -97,6 +108,21 @@ private: * when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image. */ void configure_reshape_input_kernel(const ICLTensor *input, const ICLTensor *output, size_t axis); + /** Utility method to configure the kernels needed to flatten the input + * tensor. + * + * @note This function changes the internal state of this class. In particular, + * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and + * @p _output_flat + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Original source tensor. + * @param[in] output Original destination tensor. + * @param[in] axis (Optional) Reduction axis. It has the purpose of squashing the first @p axis + * dimensions together. For instance, given a [4x4x4x4] image, + * when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image. + */ + void configure_reshape_input_kernel(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *output, size_t axis); MemoryGroup _memory_group; CLLogits1DMaxShiftExpSumKernel _max_shift_exp_sum_kernel; diff --git a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h index ef9f164112..b8e2bdc4c6 100644 --- a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h +++ b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h @@ -63,6 +63,15 @@ public: * @param[out] output Tensor output. Data types supported: same as @p input */ void configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output); /** Set the input and output tensors. (Static block shape and paddings) * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. @@ -73,6 +82,18 @@ public: * @param[out] output Tensor output. Data types supported: same as @p input */ void configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output); + /** Set the input and output tensors. (Static block shape and paddings) + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[in] padding_left The left padding of the output tensor. + * @param[in] padding_right The right padding of the output tensor. + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, + ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayer * * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. diff --git a/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h index be7937d0e6..ac011dd998 100644 --- a/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h +++ b/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h @@ -46,6 +46,14 @@ public: * @param[in] block_shape Block shape value. */ void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape); /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToDepthLayer. * * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. diff --git a/arm_compute/runtime/CL/functions/CLStackLayer.h b/arm_compute/runtime/CL/functions/CLStackLayer.h index ebce4f34d0..9b204458c3 100644 --- a/arm_compute/runtime/CL/functions/CLStackLayer.h +++ b/arm_compute/runtime/CL/functions/CLStackLayer.h @@ -56,6 +56,17 @@ public: * @param[out] output Output tensor. Data types supported: Same as @p input. */ void configure(const std::vector &input, int axis, ICLTensor *output); + /** Initialise the kernel's inputs vector and output. + * + * @note Supported input tensor rank: up to 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The vectors containing all the tensors with the same shape to stack. Data types supported: All. + * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. + * Negative values wrap around + * @param[out] output Output tensor. Data types supported: Same as @p input. + */ + void configure(const CLCompileContext &compile_context, const std::vector &input, int axis, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLStackLayerKernel * * @note Supported input tensor rank: up to 4 diff --git a/arm_compute/runtime/CL/functions/CLStridedSlice.h b/arm_compute/runtime/CL/functions/CLStridedSlice.h index 6bde2c0af4..bb2bc962d6 100644 --- a/arm_compute/runtime/CL/functions/CLStridedSlice.h +++ b/arm_compute/runtime/CL/functions/CLStridedSlice.h @@ -52,6 +52,24 @@ public: void configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0); + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: All. + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] begin_mask (Optional) If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] end_mask (Optional) If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. + * A slice of size 1 starting from starts[i] in the dimension must be preserved. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, + const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, + int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0); /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSlice * diff --git a/arm_compute/runtime/CL/functions/CLTableLookup.h b/arm_compute/runtime/CL/functions/CLTableLookup.h index c1b7b943a0..1c11f076a3 100644 --- a/arm_compute/runtime/CL/functions/CLTableLookup.h +++ b/arm_compute/runtime/CL/functions/CLTableLookup.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -42,6 +42,14 @@ public: * @param[out] output Output tensor. Data types supported: U8 and S16 */ void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output); + /** Initialise the kernel's inputs and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input First tensor input. Data types supported: U8 and S16 + * @param[in] lut Input lookup table. Data types supported: U8 and S16 + * @param[out] output Output tensor. Data types supported: U8 and S16 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output); }; } #endif /*ARM_COMPUTE_CLTABLELOOKUP_H */ diff --git a/arm_compute/runtime/CL/functions/CLThreshold.h b/arm_compute/runtime/CL/functions/CLThreshold.h index a19b320b3e..d8ae6fbb34 100644 --- a/arm_compute/runtime/CL/functions/CLThreshold.h +++ b/arm_compute/runtime/CL/functions/CLThreshold.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,6 +50,20 @@ public: void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold, uint8_t false_value = 0, uint8_t true_value = 0, ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0); + /** Initialise the function's source, destination, thresholds and threshold type + * + * @param[in] compile_context The compile context to be used. + * @param[in] input First tensor input. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] threshold Threshold. If upper threshold is specified, this will be used as the lower threshold. + * @param[in] false_value Value to assign when the condition is false. + * @param[in] true_value value to assign when the condition is true. + * @param[in] type Thresholding type. Can either be BINARY or RANGE. + * @param[in] upper Upper threshold. Only used with RANGE thresholding + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, uint8_t threshold, + uint8_t false_value = 0, uint8_t true_value = 0, + ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0); }; } #endif /*ARM_COMPUTE_CLTHRESHOLD_H */ diff --git a/arm_compute/runtime/CL/functions/CLTile.h b/arm_compute/runtime/CL/functions/CLTile.h index 9c83b0cace..0dad9ad89d 100644 --- a/arm_compute/runtime/CL/functions/CLTile.h +++ b/arm_compute/runtime/CL/functions/CLTile.h @@ -43,6 +43,14 @@ public: * @param[out] output Destination tensor. Same as @p input */ void configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples); + /** Set the source, destination of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: All. + * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. + * @param[out] output Destination tensor. Same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples); /** Static function to check if given info will lead to a valid configuration of @ref CLTile * * @param[in] input Source tensor info. Data type supported: All. diff --git a/arm_compute/runtime/CL/functions/CLTranspose.h b/arm_compute/runtime/CL/functions/CLTranspose.h index 61092a1914..b2fdcda5c4 100644 --- a/arm_compute/runtime/CL/functions/CLTranspose.h +++ b/arm_compute/runtime/CL/functions/CLTranspose.h @@ -44,6 +44,13 @@ public: * @param[out] output Output tensor. Data type supported: Same as @p input */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's inputs and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLTranspose * * @param[in] input The input tensor. Data types supported: All. diff --git a/arm_compute/runtime/CL/functions/CLUnstack.h b/arm_compute/runtime/CL/functions/CLUnstack.h index 814d07384c..777da692be 100644 --- a/arm_compute/runtime/CL/functions/CLUnstack.h +++ b/arm_compute/runtime/CL/functions/CLUnstack.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -55,6 +55,16 @@ public: * */ void configure(const ICLTensor *input, const std::vector &output_vector, int axis); + /** Set the input, output and unstacking axis. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input A tensor to be unstacked. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in,out] output_vector A vector of tensors. Data types supported: Same as @p input. + * Note: The number of elements of the vector will be used as the number of slices to be taken from the axis. + * @param[in] axis The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around. + * + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const std::vector &output_vector, int axis); /** Static function to check if given info will lead to a valid configuration of @ref CLUnstack * * @param[in] input Input tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 diff --git a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h index 1695fd7d2b..5f4f57f824 100644 --- a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h +++ b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h @@ -60,6 +60,16 @@ public: */ void configure(ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy); + /** Initialize the function's source, destination, interpolation type and border_mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] info Contains stride information described in @ref Size2D. + * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, + const Size2D &info, const InterpolationPolicy upsampling_policy); /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample * * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLWarpAffine.h b/arm_compute/runtime/CL/functions/CLWarpAffine.h index 2de7107f13..1a2fe9d4d5 100644 --- a/arm_compute/runtime/CL/functions/CLWarpAffine.h +++ b/arm_compute/runtime/CL/functions/CLWarpAffine.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -48,6 +48,19 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source temspr. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The affine matrix. Must be 2x3 of type float. + * The matrix argument requires 9 values, the last 3 values are ignored. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy, BorderMode border_mode, + uint8_t constant_border_value = 0); }; } #endif /*ARM_COMPUTE_CLWARPAFFINE_H */ diff --git a/arm_compute/runtime/CL/functions/CLWarpPerspective.h b/arm_compute/runtime/CL/functions/CLWarpPerspective.h index 93fcc85a95..5db9ec4cf0 100644 --- a/arm_compute/runtime/CL/functions/CLWarpPerspective.h +++ b/arm_compute/runtime/CL/functions/CLWarpPerspective.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -47,6 +47,18 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 3x3 of type float. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy, BorderMode border_mode, + uint8_t constant_border_value = 0); }; } #endif /*ARM_COMPUTE_CLWARPPERSPECTIVE_H */ diff --git a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h index 7ac59c900c..c1de5f15ce 100644 --- a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -75,6 +75,26 @@ public: */ void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false); + /** Set the input and output tensors. + * + * @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout + * @note Some Winograd configurations (i.e. F(4x4, 5x5)) are supported only with enable_fast_math = true + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation + * available which may introduce a drop of accuracy as well. Default is false + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false); /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradConvolutionLayer * * @note: This function only works with 3x3,3x1,1x3,5x5,5x1 and 1x5 kernels along with unit strides for both NCHW and NHWC data layout diff --git a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h index e1ab928cf2..11a402e51d 100644 --- a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h +++ b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -55,6 +55,25 @@ public: * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. */ void configure(ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); + /** Set the input and output tensors. + * + * @note Winograd input transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd input transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to transform. Data types supported: F16,F32 + * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransform. * * @note Winograd input transform supports the following configurations for NCWH data layout diff --git a/arm_compute/runtime/CL/functions/CLYOLOLayer.h b/arm_compute/runtime/CL/functions/CLYOLOLayer.h index 95c684b2c3..e70d84b97e 100644 --- a/arm_compute/runtime/CL/functions/CLYOLOLayer.h +++ b/arm_compute/runtime/CL/functions/CLYOLOLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -53,6 +53,18 @@ public: * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) */ void configure(ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] act_info Activation layer parameters. + * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); /** Static function to check if given info will lead to a valid configuration of @ref CLYOLOLayer * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result -- cgit v1.2.1