From 4c6bd514a8d424a29b776754f1b3426fa3a8c339 Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Wed, 8 Apr 2020 10:15:51 +0100 Subject: COMPMID-3280: Make all ML primitives for CL use the new interface - Part 1 - Only CLKernels have been updated Change-Id: Ife55b847c2e39e712a186eb6ca452503d5b66937 Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3001 Reviewed-by: Michele Di Giorgio Reviewed-by: Michalis Spyrou Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- arm_compute/core/CL/CLHelpers.h | 2 +- .../core/CL/kernels/CLAbsoluteDifferenceKernel.h | 10 +++- arm_compute/core/CL/kernels/CLAccumulateKernel.h | 25 +++++++++- .../core/CL/kernels/CLActivationLayerKernel.h | 15 ++++-- .../core/CL/kernels/CLArgMinMaxLayerKernel.h | 14 +++++- .../CL/kernels/CLBatchConcatenateLayerKernel.h | 12 +++++ .../CL/kernels/CLBatchNormalizationLayerKernel.h | 21 +++++++- .../core/CL/kernels/CLBatchToSpaceLayerKernel.h | 17 +++++++ arm_compute/core/CL/kernels/CLBitwiseAndKernel.h | 10 +++- arm_compute/core/CL/kernels/CLBitwiseNotKernel.h | 9 +++- arm_compute/core/CL/kernels/CLBitwiseOrKernel.h | 10 +++- arm_compute/core/CL/kernels/CLBitwiseXorKernel.h | 10 +++- .../core/CL/kernels/CLBoundingBoxTransformKernel.h | 15 +++++- arm_compute/core/CL/kernels/CLBox3x3Kernel.h | 10 +++- arm_compute/core/CL/kernels/CLCannyEdgeKernel.h | 42 +++++++++++++++- .../core/CL/kernels/CLChannelCombineKernel.h | 21 +++++++- .../core/CL/kernels/CLChannelExtractKernel.h | 18 ++++++- .../core/CL/kernels/CLChannelShuffleLayerKernel.h | 8 ++++ arm_compute/core/CL/kernels/CLCol2ImKernel.h | 10 ++++ arm_compute/core/CL/kernels/CLColorConvertKernel.h | 32 ++++++++++++- arm_compute/core/CL/kernels/CLComparisonKernel.h | 9 ++++ .../kernels/CLConvertFullyConnectedWeightsKernel.h | 9 ++++ arm_compute/core/CL/kernels/CLConvolutionKernel.h | 44 ++++++++++++++++- arm_compute/core/CL/kernels/CLCopyKernel.h | 11 ++++- arm_compute/core/CL/kernels/CLCropKernel.h | 17 ++++++- .../kernels/CLDeconvolutionLayerUpsampleKernel.h | 8 ++++ .../kernels/CLDeconvolutionReshapeOutputKernel.h | 13 +++++ .../CL/kernels/CLDepthConcatenateLayerKernel.h | 12 +++++ .../core/CL/kernels/CLDepthConvertLayerKernel.h | 22 ++++++++- .../core/CL/kernels/CLDepthToSpaceLayerKernel.h | 8 ++++ .../CLDepthwiseConvolutionLayer3x3NCHWKernel.h | 23 ++++++++- .../CLDepthwiseConvolutionLayer3x3NHWCKernel.h | 23 ++++++++- .../CLDepthwiseConvolutionLayerNativeKernel.h | 22 +++++++++ ...DepthwiseConvolutionLayerReshapeWeightsKernel.h | 8 ++++ .../core/CL/kernels/CLDequantizationLayerKernel.h | 7 +++ arm_compute/core/CL/kernels/CLDerivativeKernel.h | 13 ++++- arm_compute/core/CL/kernels/CLDilateKernel.h | 10 +++- .../CL/kernels/CLDirectConvolutionLayerKernel.h | 21 ++++++++ .../CL/kernels/CLElementWiseUnaryLayerKernel.h | 10 +++- .../core/CL/kernels/CLElementwiseOperationKernel.h | 27 +++++++++++ arm_compute/core/CL/kernels/CLErodeKernel.h | 10 +++- .../core/CL/kernels/CLFFTDigitReverseKernel.h | 11 ++++- .../core/CL/kernels/CLFFTRadixStageKernel.h | 12 ++++- arm_compute/core/CL/kernels/CLFFTScaleKernel.h | 10 +++- arm_compute/core/CL/kernels/CLFastCornersKernel.h | 21 +++++++- arm_compute/core/CL/kernels/CLFillBorderKernel.h | 9 ++++ arm_compute/core/CL/kernels/CLFlattenLayerKernel.h | 9 ++++ .../CL/kernels/CLFuseBatchNormalizationKernel.h | 21 +++++++- .../CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h | 13 ++++- .../kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h | 16 +++++++ .../CLGEMMLowpMatrixMultiplyReshapedKernel.h | 20 ++++++++ ...CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h | 27 +++++++++++ .../kernels/CLGEMMLowpOffsetContributionKernel.h | 18 ++++++- ...CLGEMMLowpOffsetContributionOutputStageKernel.h | 23 +++++++++ ...CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h | 10 ++++ .../CLGEMMLowpQuantizeDownInt32ScaleKernel.h | 10 ++++ ...antizeDownInt32ToInt16ScaleByFixedPointKernel.h | 16 ++++++- ...uantizeDownInt32ToInt8ScaleByFixedPointKernel.h | 18 ++++++- ...antizeDownInt32ToUint8ScaleByFixedPointKernel.h | 18 ++++++- .../core/CL/kernels/CLGEMMLowpReductionKernel.h | 36 ++++++++++++++ .../kernels/CLGEMMMatrixAccumulateBiasesKernel.h | 9 +++- .../core/CL/kernels/CLGEMMMatrixMultiplyKernel.h | 19 +++++++- .../CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h | 23 ++++++++- .../kernels/CLGEMMMatrixMultiplyReshapedKernel.h | 31 +++++++++++- .../CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h | 23 ++++++++- .../CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h | 8 ++++ .../core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h | 15 ++++++ .../core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h | 16 ++++++- arm_compute/core/CL/kernels/CLGatherKernel.h | 9 ++++ arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h | 10 +++- arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h | 18 ++++++- .../core/CL/kernels/CLGaussianPyramidKernel.h | 16 ++++++- .../CL/kernels/CLGenerateProposalsLayerKernel.h | 11 ++++- .../core/CL/kernels/CLHOGDescriptorKernel.h | 19 +++++++- arm_compute/core/CL/kernels/CLHOGDetectorKernel.h | 17 ++++++- .../core/CL/kernels/CLHarrisCornersKernel.h | 17 ++++++- .../CL/kernels/CLHeightConcatenateLayerKernel.h | 9 ++++ arm_compute/core/CL/kernels/CLHistogramKernel.h | 16 ++++++- arm_compute/core/CL/kernels/CLIm2ColKernel.h | 16 +++++++ .../kernels/CLInstanceNormalizationLayerKernel.h | 9 ++++ .../core/CL/kernels/CLIntegralImageKernel.h | 15 +++++- .../core/CL/kernels/CLL2NormalizeLayerKernel.h | 14 +++++- arm_compute/core/CL/kernels/CLLKTrackerKernel.h | 56 +++++++++++++++++++++- .../CLLocallyConnectedMatrixMultiplyKernel.h | 10 +++- .../core/CL/kernels/CLMagnitudePhaseKernel.h | 16 ++++++- arm_compute/core/CL/kernels/CLMeanStdDevKernel.h | 12 ++++- .../CL/kernels/CLMeanStdDevNormalizationKernel.h | 13 ++++- arm_compute/core/CL/kernels/CLMedian3x3Kernel.h | 10 +++- arm_compute/core/CL/kernels/CLMemsetKernel.h | 8 ++++ arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h | 10 +++- .../core/CL/kernels/CLMinMaxLocationKernel.h | 22 ++++++++- .../core/CL/kernels/CLNonLinearFilterKernel.h | 16 ++++++- .../CL/kernels/CLNonMaximaSuppression3x3Kernel.h | 10 +++- .../core/CL/kernels/CLNormalizationLayerKernel.h | 12 ++++- .../CL/kernels/CLNormalizePlanarYUVLayerKernel.h | 11 +++++ arm_compute/core/CL/kernels/CLPadLayerKernel.h | 13 +++++ arm_compute/core/CL/kernels/CLPermuteKernel.h | 10 ++++ .../CL/kernels/CLPixelWiseMultiplicationKernel.h | 23 +++++++++ arm_compute/core/CL/kernels/CLPoolingLayerKernel.h | 10 ++++ .../core/CL/kernels/CLPriorBoxLayerKernel.h | 15 +++++- .../core/CL/kernels/CLQuantizationLayerKernel.h | 9 ++++ .../core/CL/kernels/CLROIAlignLayerKernel.h | 16 +++++++ .../core/CL/kernels/CLROIPoolingLayerKernel.h | 17 ++++++- arm_compute/core/CL/kernels/CLRangeKernel.h | 11 ++++- .../core/CL/kernels/CLReductionOperationKernel.h | 11 +++++ arm_compute/core/CL/kernels/CLRemapKernel.h | 13 ++++- arm_compute/core/CL/kernels/CLReorgLayerKernel.h | 11 +++++ arm_compute/core/CL/kernels/CLReshapeLayerKernel.h | 7 +++ arm_compute/core/CL/kernels/CLReverseKernel.h | 8 ++++ arm_compute/core/CL/kernels/CLScaleKernel.h | 13 +++++ arm_compute/core/CL/kernels/CLScharr3x3Kernel.h | 13 ++++- arm_compute/core/CL/kernels/CLSelectKernel.h | 9 ++++ arm_compute/core/CL/kernels/CLSobel3x3Kernel.h | 13 ++++- arm_compute/core/CL/kernels/CLSobel5x5Kernel.h | 25 +++++++++- arm_compute/core/CL/kernels/CLSobel7x7Kernel.h | 25 +++++++++- arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h | 38 ++++++++++++++- .../core/CL/kernels/CLSpaceToBatchLayerKernel.h | 20 ++++++++ .../core/CL/kernels/CLSpaceToDepthLayerKernel.h | 8 ++++ arm_compute/core/CL/kernels/CLStackLayerKernel.h | 14 ++++++ arm_compute/core/CL/kernels/CLStridedSliceKernel.h | 18 +++++++ arm_compute/core/CL/kernels/CLTableLookupKernel.h | 10 +++- arm_compute/core/CL/kernels/CLThresholdKernel.h | 15 +++++- arm_compute/core/CL/kernels/CLTileKernel.h | 10 ++++ arm_compute/core/CL/kernels/CLTransposeKernel.h | 7 +++ .../core/CL/kernels/CLUpsampleLayerKernel.h | 9 ++++ arm_compute/core/CL/kernels/CLWarpAffineKernel.h | 12 ++++- .../core/CL/kernels/CLWarpPerspectiveKernel.h | 11 ++++- .../core/CL/kernels/CLWeightsReshapeKernel.h | 16 ++++++- .../CL/kernels/CLWidthConcatenate2TensorsKernel.h | 8 ++++ .../CL/kernels/CLWidthConcatenate4TensorsKernel.h | 10 ++++ .../CL/kernels/CLWidthConcatenateLayerKernel.h | 9 ++++ .../CL/kernels/CLWinogradFilterTransformKernel.h | 21 +++++++- .../CL/kernels/CLWinogradInputTransformKernel.h | 21 +++++++- .../CL/kernels/CLWinogradOutputTransformKernel.h | 24 +++++++++- arm_compute/core/CL/kernels/CLYOLOLayerKernel.h | 14 +++++- .../ICLDepthwiseConvolutionLayer3x3Kernel.h | 23 ++++++++- src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp | 9 +++- src/core/CL/kernels/CLAccumulateKernel.cpp | 23 +++++++-- src/core/CL/kernels/CLActivationLayerKernel.cpp | 12 +++-- src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp | 7 ++- .../CL/kernels/CLBatchConcatenateLayerKernel.cpp | 7 ++- .../CL/kernels/CLBatchNormalizationLayerKernel.cpp | 9 +++- src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp | 14 +++++- src/core/CL/kernels/CLBitwiseAndKernel.cpp | 9 +++- src/core/CL/kernels/CLBitwiseNotKernel.cpp | 9 +++- src/core/CL/kernels/CLBitwiseOrKernel.cpp | 9 +++- src/core/CL/kernels/CLBitwiseXorKernel.cpp | 9 +++- .../CL/kernels/CLBoundingBoxTransformKernel.cpp | 7 ++- src/core/CL/kernels/CLBox3x3Kernel.cpp | 9 +++- src/core/CL/kernels/CLCannyEdgeKernel.cpp | 22 +++++++-- src/core/CL/kernels/CLChannelCombineKernel.cpp | 16 +++++-- src/core/CL/kernels/CLChannelExtractKernel.cpp | 16 +++++-- .../CL/kernels/CLChannelShuffleLayerKernel.cpp | 7 ++- src/core/CL/kernels/CLCol2ImKernel.cpp | 7 ++- src/core/CL/kernels/CLColorConvertKernel.cpp | 28 +++++++++-- src/core/CL/kernels/CLComparisonKernel.cpp | 7 ++- .../CLConvertFullyConnectedWeightsKernel.cpp | 8 +++- src/core/CL/kernels/CLConvolutionKernel.cpp | 33 +++++++++++-- src/core/CL/kernels/CLCopyKernel.cpp | 9 +++- src/core/CL/kernels/CLCropKernel.cpp | 8 +++- .../kernels/CLDeconvolutionLayerUpsampleKernel.cpp | 8 +++- .../kernels/CLDeconvolutionReshapeOutputKernel.cpp | 9 +++- .../CL/kernels/CLDepthConcatenateLayerKernel.cpp | 7 ++- src/core/CL/kernels/CLDepthConvertLayerKernel.cpp | 9 +++- src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp | 7 ++- .../CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp | 9 +++- .../CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp | 9 +++- .../CLDepthwiseConvolutionLayerNativeKernel.cpp | 10 +++- ...pthwiseConvolutionLayerReshapeWeightsKernel.cpp | 7 ++- .../CL/kernels/CLDequantizationLayerKernel.cpp | 7 ++- src/core/CL/kernels/CLDerivativeKernel.cpp | 7 ++- src/core/CL/kernels/CLDilateKernel.cpp | 9 +++- .../CL/kernels/CLDirectConvolutionLayerKernel.cpp | 12 +++-- .../CL/kernels/CLElementWiseUnaryLayerKernel.cpp | 7 ++- .../CL/kernels/CLElementwiseOperationKernel.cpp | 24 ++++++++-- src/core/CL/kernels/CLErodeKernel.cpp | 9 +++- src/core/CL/kernels/CLFFTDigitReverseKernel.cpp | 7 ++- src/core/CL/kernels/CLFFTRadixStageKernel.cpp | 7 ++- src/core/CL/kernels/CLFFTScaleKernel.cpp | 7 ++- src/core/CL/kernels/CLFastCornersKernel.cpp | 14 +++++- src/core/CL/kernels/CLFillBorderKernel.cpp | 7 ++- src/core/CL/kernels/CLFlattenLayerKernel.cpp | 7 ++- .../CL/kernels/CLFuseBatchNormalizationKernel.cpp | 10 +++- .../CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp | 7 ++- .../CLGEMMLowpMatrixMultiplyNativeKernel.cpp | 9 +++- .../CLGEMMLowpMatrixMultiplyReshapedKernel.cpp | 9 +++- ...GEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp | 9 +++- .../kernels/CLGEMMLowpOffsetContributionKernel.cpp | 9 +++- ...GEMMLowpOffsetContributionOutputStageKernel.cpp | 10 +++- ...GEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp | 8 +++- .../CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp | 7 ++- ...tizeDownInt32ToInt16ScaleByFixedPointKernel.cpp | 9 +++- ...ntizeDownInt32ToInt8ScaleByFixedPointKernel.cpp | 9 +++- ...tizeDownInt32ToUint8ScaleByFixedPointKernel.cpp | 9 +++- src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp | 14 +++++- .../kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp | 7 ++- src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp | 8 +++- .../kernels/CLGEMMMatrixMultiplyNativeKernel.cpp | 10 +++- .../kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp | 10 +++- .../CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp | 10 +++- .../kernels/CLGEMMMatrixVectorMultiplyKernel.cpp | 7 ++- .../CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp | 7 ++- .../CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp | 7 ++- src/core/CL/kernels/CLGatherKernel.cpp | 7 ++- src/core/CL/kernels/CLGaussian3x3Kernel.cpp | 9 +++- src/core/CL/kernels/CLGaussian5x5Kernel.cpp | 16 +++++-- src/core/CL/kernels/CLGaussianPyramidKernel.cpp | 14 +++++- .../CL/kernels/CLGenerateProposalsLayerKernel.cpp | 7 ++- src/core/CL/kernels/CLHOGDescriptorKernel.cpp | 14 +++++- src/core/CL/kernels/CLHOGDetectorKernel.cpp | 9 +++- src/core/CL/kernels/CLHarrisCornersKernel.cpp | 9 +++- .../CL/kernels/CLHeightConcatenateLayerKernel.cpp | 7 ++- src/core/CL/kernels/CLHistogramKernel.cpp | 14 +++++- src/core/CL/kernels/CLIm2ColKernel.cpp | 9 +++- .../kernels/CLInstanceNormalizationLayerKernel.cpp | 7 ++- src/core/CL/kernels/CLIntegralImageKernel.cpp | 14 +++++- src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp | 7 ++- src/core/CL/kernels/CLLKTrackerKernel.cpp | 37 ++++++++++++-- .../CLLocallyConnectedMatrixMultiplyKernel.cpp | 9 +++- src/core/CL/kernels/CLMagnitudePhaseKernel.cpp | 8 +++- src/core/CL/kernels/CLMeanStdDevKernel.cpp | 9 +++- .../CL/kernels/CLMeanStdDevNormalizationKernel.cpp | 7 ++- src/core/CL/kernels/CLMedian3x3Kernel.cpp | 7 ++- src/core/CL/kernels/CLMemsetKernel.cpp | 9 +++- src/core/CL/kernels/CLMinMaxLayerKernel.cpp | 7 ++- src/core/CL/kernels/CLMinMaxLocationKernel.cpp | 15 +++++- src/core/CL/kernels/CLNonLinearFilterKernel.cpp | 11 ++++- .../CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp | 9 +++- src/core/CL/kernels/CLNormalizationLayerKernel.cpp | 7 ++- .../CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp | 7 ++- src/core/CL/kernels/CLPadLayerKernel.cpp | 7 ++- src/core/CL/kernels/CLPermuteKernel.cpp | 7 ++- .../CL/kernels/CLPixelWiseMultiplicationKernel.cpp | 15 +++++- src/core/CL/kernels/CLPoolingLayerKernel.cpp | 11 +++-- src/core/CL/kernels/CLPriorBoxLayerKernel.cpp | 8 +++- src/core/CL/kernels/CLQuantizationLayerKernel.cpp | 7 ++- src/core/CL/kernels/CLROIAlignLayerKernel.cpp | 7 ++- src/core/CL/kernels/CLROIPoolingLayerKernel.cpp | 7 ++- src/core/CL/kernels/CLRangeKernel.cpp | 7 ++- src/core/CL/kernels/CLReductionOperationKernel.cpp | 7 ++- src/core/CL/kernels/CLRemapKernel.cpp | 10 +++- src/core/CL/kernels/CLReorgLayerKernel.cpp | 7 ++- src/core/CL/kernels/CLReshapeLayerKernel.cpp | 9 +++- src/core/CL/kernels/CLReverseKernel.cpp | 7 ++- src/core/CL/kernels/CLScaleKernel.cpp | 8 +++- src/core/CL/kernels/CLScharr3x3Kernel.cpp | 9 +++- src/core/CL/kernels/CLSelectKernel.cpp | 7 ++- src/core/CL/kernels/CLSobel3x3Kernel.cpp | 7 ++- src/core/CL/kernels/CLSobel5x5Kernel.cpp | 14 +++++- src/core/CL/kernels/CLSobel7x7Kernel.cpp | 14 +++++- src/core/CL/kernels/CLSoftmaxLayerKernel.cpp | 14 +++++- src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp | 16 ++++++- src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp | 7 ++- src/core/CL/kernels/CLStackLayerKernel.cpp | 7 ++- src/core/CL/kernels/CLStridedSliceKernel.cpp | 9 +++- src/core/CL/kernels/CLTableLookupKernel.cpp | 9 +++- src/core/CL/kernels/CLThresholdKernel.cpp | 10 +++- src/core/CL/kernels/CLTileKernel.cpp | 7 ++- src/core/CL/kernels/CLTransposeKernel.cpp | 7 ++- src/core/CL/kernels/CLUpsampleLayerKernel.cpp | 7 ++- src/core/CL/kernels/CLWarpAffineKernel.cpp | 7 ++- src/core/CL/kernels/CLWarpPerspectiveKernel.cpp | 9 +++- src/core/CL/kernels/CLWeightsReshapeKernel.cpp | 7 ++- .../kernels/CLWidthConcatenate2TensorsKernel.cpp | 7 ++- .../kernels/CLWidthConcatenate4TensorsKernel.cpp | 8 +++- .../CL/kernels/CLWidthConcatenateLayerKernel.cpp | 7 ++- .../CL/kernels/CLWinogradFilterTransformKernel.cpp | 7 ++- .../CL/kernels/CLWinogradInputTransformKernel.cpp | 7 ++- .../CL/kernels/CLWinogradOutputTransformKernel.cpp | 8 +++- src/core/CL/kernels/CLYOLOLayerKernel.cpp | 7 ++- src/runtime/CL/functions/CLActivationLayer.cpp | 4 +- 271 files changed, 3117 insertions(+), 284 deletions(-) diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h index ee6397af7a..77c17c7d9c 100644 --- a/arm_compute/core/CL/CLHelpers.h +++ b/arm_compute/core/CL/CLHelpers.h @@ -206,7 +206,7 @@ cl::Kernel create_opencl_kernel(CLCoreRuntimeContext *ctx, const std::string &ke * * @return An opencl kernel */ -cl::Kernel create_kernel(CLCompileContext &ctx, const std::string &kernel_name, const std::set &build_opts); +cl::Kernel create_kernel(CLCompileContext &ctx, const std::string &kernel_name, const std::set &build_opts = std::set()); /** Creates a suitable LWS hint object for parallel implementations. Sets the number of WG based on the input size. * If input width is smaller than 128 we can use fewer threads than 8. diff --git a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h b/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h index 24993e2fda..18896725e2 100644 --- a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h +++ b/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -58,6 +58,14 @@ public: * @param[out] output Destination tensor. Data types supported: U8/S16. */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Set the inputs and output images. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source tensor. Data types supported: U8/S16. + * @param[in] input2 Source tensor. Data types supported: U8/S16. + * @param[out] output Destination tensor. Data types supported: U8/S16. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLAccumulateKernel.h b/arm_compute/core/CL/kernels/CLAccumulateKernel.h index 84f3f2c41a..d7cb09fdd3 100644 --- a/arm_compute/core/CL/kernels/CLAccumulateKernel.h +++ b/arm_compute/core/CL/kernels/CLAccumulateKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -46,6 +46,13 @@ public: * @param[out] accum Destination tensor. Data types supported: S16. */ void configure(const ICLTensor *input, ICLTensor *accum); + /** Set the input and accumulation tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] accum Destination tensor. Data types supported: S16. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum); }; /** Interface for the accumulate weighted kernel. @@ -67,6 +74,14 @@ public: * @param[in,out] accum Accumulated tensor. Data types supported: U8. */ void configure(const ICLTensor *input, float alpha, ICLTensor *accum); + /** Set the input and accumulation images, and the scale value. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32. + * @param[in,out] accum Accumulated tensor. Data types supported: U8. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum); }; /** Interface for the accumulate squared kernel. @@ -86,6 +101,14 @@ public: * @param[in,out] accum Accumulated tensor. Data types supported: S16. */ void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum); + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32. + * @param[in,out] accum Accumulated tensor. Data types supported: S16. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum); }; } // namespace arm_compute #endif /*ARM_COMPUTE_CLACCUMULATEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h index 5b65a54824..d25480cd60 100644 --- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h @@ -29,13 +29,12 @@ namespace arm_compute { class ICLTensor; -class CLCoreRuntimeContext; /** Interface for the activation layer kernel. */ class CLActivationLayerKernel : public ICLKernel { public: /** Default constructor */ - CLActivationLayerKernel(CLCoreRuntimeContext *ctx = nullptr); + CLActivationLayerKernel(); /** Prevent instances of this class from being copied (As this class contains pointers) */ CLActivationLayerKernel(const CLActivationLayerKernel &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -56,6 +55,17 @@ public: * @param[in] act_info Activation layer information. */ void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + */ + void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayerKernel * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result @@ -74,7 +84,6 @@ private: ICLTensor *_input; ICLTensor *_output; bool _run_in_place; - CLCoreRuntimeContext *_ctx; }; } // namespace arm_compute #endif /*ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h b/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h index 7f4cfe3edc..831cee5e58 100644 --- a/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -65,6 +65,18 @@ public: * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported. */ void configure(const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: S32/F16/F32. + * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32 + * Has to be nullptr for the first iteration + * @param[out] output Destination tensor. Data types supported: U32/S32 + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 + * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op); /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayerKernel. * diff --git a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h index 3711617959..06764302f4 100644 --- a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h @@ -61,6 +61,18 @@ public: * */ void configure(const ICLTensor *input, unsigned int batch_offset, ICLTensor *output); + /** Initialise the kernel's inputs and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[in] batch_offset The offset on axis # 3. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int batch_offset, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel * * @param[in] input Input tensor info. Data types supported: All. diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h index 7afa0e2784..564b21680b 100644 --- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -65,6 +65,25 @@ public: */ void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr, float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); + /** Set the input and output tensors. + * + * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input + * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input + * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + */ + void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr, + float epsilon = 0.001f, + ActivationLayerInfo act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayerKernel * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result. diff --git a/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h index 21197c26d0..f9289eab73 100644 --- a/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h @@ -54,6 +54,14 @@ public: * @param[out] output Tensor output. Data types supported: same as @p input */ void configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output); + /** Initialise the kernel's inputs and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output); /** Initialise the kernel's inputs and output (Static block shape). * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. @@ -62,6 +70,15 @@ public: * @param[out] output Tensor output. Data types supported: same as @p input */ void configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output); + /** Initialise the kernel's inputs and output (Static block shape). + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. diff --git a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h b/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h index 0aa2228a48..6c60bc0f33 100644 --- a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h +++ b/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -55,6 +55,14 @@ public: * @param[out] output Destination tensor. Data types supported: U8. */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Set the inputs and output images + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h b/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h index a7b00dd8df..0522841e73 100644 --- a/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h +++ b/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -44,6 +44,13 @@ public: * @param[out] output Destination tensor. Data types supported: U8. */ void configure(const ICLTensor *input, ICLTensor *output); + /** Set the inputs and output images. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLBITWISENOTKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h b/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h index 5764cf5f90..151f19d374 100644 --- a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h +++ b/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -55,6 +55,14 @@ public: * @param[out] output Destination tensor. Data types supported: U8. */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Set the inputs and output images + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h b/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h index c1e2e4b744..03c1e05da4 100644 --- a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h +++ b/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -55,6 +55,14 @@ public: * @param[out] output Destination tensor. Data types supported: U8. */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Set the inputs and output images + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h b/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h index bd1645573a..ffa63bd5a4 100644 --- a/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h +++ b/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,19 @@ public: * */ void configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. + * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input + * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. + * + * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. + * + */ + void configure(CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform * diff --git a/arm_compute/core/CL/kernels/CLBox3x3Kernel.h b/arm_compute/core/CL/kernels/CLBox3x3Kernel.h index 359b227850..572ae87d9a 100644 --- a/arm_compute/core/CL/kernels/CLBox3x3Kernel.h +++ b/arm_compute/core/CL/kernels/CLBox3x3Kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,6 +43,14 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /**Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); //Inherited methods overriden: BorderSize border_size() const override; diff --git a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h index 2d348dd5a6..67c23dd811 100644 --- a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h +++ b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -54,6 +54,18 @@ public: * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. */ void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type); + /** Initialise the kernel's sources, destinations and border mode. + * + * @note gx, gy and mag must all be the same size (either 16 or 32). + * + * @param[in] compile_context The compile context to be used. + * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32. + * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx. + * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy. + * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8. + * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -90,6 +102,16 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined); + /** Initialise the kernel's sources, destination and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32. + * @param[in] phase Source tensor - Quantized phase. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U16/U32. + * @param[in] lower_thr Lower threshold. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -129,6 +151,24 @@ public: */ void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter); + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32. + * Expected to be initialized to 0 before each run. + * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32 + * Expected to be initialized to 0 before each run. + * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32. + * Expected to be initialized to 0 before each run. + * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8. + * Expected to be initialized to 0 before each run. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, + ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h index ae5658fba4..60d0bd4a45 100644 --- a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h +++ b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -60,6 +60,16 @@ public: * @param[out] output The single planar output tensor. */ void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); + /** Configure function's inputs and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. + * @param[out] output The single planar output tensor. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); /** Configure function's inputs and outputs. * * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. @@ -68,6 +78,15 @@ public: * @param[out] output The multi planar output tensor. */ void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); + /** Configure function's inputs and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[out] output The multi planar output tensor. + */ + void configure(CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLChannelExtractKernel.h b/arm_compute/core/CL/kernels/CLChannelExtractKernel.h index 371f17ff2b..1f2cc8900a 100644 --- a/arm_compute/core/CL/kernels/CLChannelExtractKernel.h +++ b/arm_compute/core/CL/kernels/CLChannelExtractKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -58,6 +58,14 @@ public: * @param[out] output Destination tensor. Must be of U8 format. */ void configure(const ICLTensor *input, Channel channel, ICLTensor *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 + * @param[in] channel Channel to extract. + * @param[out] output Destination tensor. Must be of U8 format. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output); /** Set the input and output of the kernel * * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 @@ -65,6 +73,14 @@ public: * @param[out] output Single-planar 2D destination image. Must be of U8 format. */ void configure(const ICLMultiImage *input, Channel channel, ICLImage *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 + * @param[in] channel Channel to extract. + * @param[out] output Single-planar 2D destination image. Must be of U8 format. + */ + void configure(CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h b/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h index 7e6589e8c0..921c20df10 100644 --- a/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h @@ -53,6 +53,14 @@ public: * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. */ void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups); + /** Configure function's inputs and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups); /** Static function to check if given info will lead to a valid configuration of @ref CLChannelShuffleLayerKernel * * @param[in] input Input tensor info. Data types supported: All. diff --git a/arm_compute/core/CL/kernels/CLCol2ImKernel.h b/arm_compute/core/CL/kernels/CLCol2ImKernel.h index b22c666a6b..6ef424853e 100644 --- a/arm_compute/core/CL/kernels/CLCol2ImKernel.h +++ b/arm_compute/core/CL/kernels/CLCol2ImKernel.h @@ -72,6 +72,16 @@ public: * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution */ void configure(const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1); + /** Set the input and output of the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW + * @param[in] convolved_dims Output convolved dimensions. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref CLCol2ImKernel * * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 diff --git a/arm_compute/core/CL/kernels/CLColorConvertKernel.h b/arm_compute/core/CL/kernels/CLColorConvertKernel.h index 2e23a6234b..25b95eb42c 100644 --- a/arm_compute/core/CL/kernels/CLColorConvertKernel.h +++ b/arm_compute/core/CL/kernels/CLColorConvertKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -59,24 +59,54 @@ public: * U8 (if the formats of @p input is RGB888) */ void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 + * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), + * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), + * U8 (if the formats of @p input is RGB888) + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Set the input and output of the kernel * * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 */ void configure(const ICLMultiImage *input, ICLImage *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 + */ + void configure(CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output); /** Set the input and output of the kernel * * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) */ void configure(const ICLImage *input, ICLMultiImage *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) + */ + void configure(CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output); /** Set the input and output of the kernel * * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) */ void configure(const ICLMultiImage *input, ICLMultiImage *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) + */ + void configure(CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLComparisonKernel.h b/arm_compute/core/CL/kernels/CLComparisonKernel.h index a9c463901d..15779938b2 100644 --- a/arm_compute/core/CL/kernels/CLComparisonKernel.h +++ b/arm_compute/core/CL/kernels/CLComparisonKernel.h @@ -56,6 +56,15 @@ public: * @param[in] operation Comparison operation to use. */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation); + /** Set the inputs and output tensors + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source tensor. Data types supported: All. + * @param[in] input2 Source tensor. Data types supported: Same as @p input1. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] operation Comparison operation to use. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation); /** Static function to check if given info will lead to a valid configuration of @ref CLComparisonKernel * * @param[in] input1 Source tensor. Data types supported: All. diff --git a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h b/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h index b204eaa2ac..f7e212e1e4 100644 --- a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h +++ b/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h @@ -61,6 +61,15 @@ public: * @param[in] data_layout The data layout the weights have been trained in. */ void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout); + /** Set the input and output tensor. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. + * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. + * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). + * @param[in] data_layout The data layout the weights have been trained in. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout); /** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeightsKernel * * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All. diff --git a/arm_compute/core/CL/kernels/CLConvolutionKernel.h b/arm_compute/core/CL/kernels/CLConvolutionKernel.h index 089a8cd10e..e1cdc88007 100644 --- a/arm_compute/core/CL/kernels/CLConvolutionKernel.h +++ b/arm_compute/core/CL/kernels/CLConvolutionKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -61,6 +61,16 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); // Inherited methods overridden: BorderSize border_size() const override; @@ -94,6 +104,15 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined); // Inherited methods overridden: BorderSize border_size() const override; @@ -124,6 +143,17 @@ public: * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution */ void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: S16. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32); // Inherited methods overridden: BorderSize border_size() const override; @@ -168,6 +198,18 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] width Width of convolution matrix (Number of columns) + * @param[in] height Height of convolution matrix (Number of rows) + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLCopyKernel.h b/arm_compute/core/CL/kernels/CLCopyKernel.h index 50bf38966d..1774f8ccad 100644 --- a/arm_compute/core/CL/kernels/CLCopyKernel.h +++ b/arm_compute/core/CL/kernels/CLCopyKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -53,6 +53,15 @@ public: * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. */ void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding = PaddingList(), Window *output_window = nullptr); + /** Initialize the kernel's input, output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] padding (Optional) Padding to be applied to the input tensor + * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding = PaddingList(), Window *output_window = nullptr); /** Static function to check if given info will lead to a valid configuration of @ref CLCopyKernel * * @param[in] input Source tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. diff --git a/arm_compute/core/CL/kernels/CLCropKernel.h b/arm_compute/core/CL/kernels/CLCropKernel.h index bcce9c15ea..103986a5f8 100644 --- a/arm_compute/core/CL/kernels/CLCropKernel.h +++ b/arm_compute/core/CL/kernels/CLCropKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -58,6 +58,21 @@ public: * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. */ void configure(const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, Window *output_window = nullptr); + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC. + * @param[out] output Destination tensor. Data type supported: F32 + * @param[in] start Coordinates of where to start cropping the image. + * @param[in] end Coordinates of where to end cropping the image. + * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input. + * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. + * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, + Window *output_window = nullptr); /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel * diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h index a1c6bbdafe..7e8a45fd8f 100644 --- a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h +++ b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h @@ -55,6 +55,14 @@ public: * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. */ void configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample * * @param[in] input Source tensor info. Data types supported: All. diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h index 6c90bd6c7f..daeb8c1f9c 100644 --- a/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h +++ b/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h @@ -68,6 +68,19 @@ public: * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. */ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info); + /** Initialise the kernel's source and destination. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[in] bias Bias tensor to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[out] output Output tensor with the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size] + * Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] input_info Deconvolution input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, + const PadStrideInfo &deconv_info); /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionReshapeOutputKernel. * diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h index e55dd5dee8..7b594417d6 100644 --- a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h @@ -61,6 +61,18 @@ public: * */ void configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output); + /** Initialise the kernel's inputs and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int depth_offset, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel * * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 diff --git a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h index fb65aa5392..8bbf9b3dce 100644 --- a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -56,6 +56,26 @@ public: * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. */ void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); + /** Set the input and output of the kernel. + * + * Valid conversions Input -> Output : + * + * - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data) + * - U8 -> S8, U16, S16, U32, S32, F16, F32 + * - U16 -> U8, S8, S16, U32, S32, F16, F32 + * - S16 -> U8, S8, U16, U32, S32, F16, F32 + * - U32 -> U8, S8, U16, S16, S32, F16, F32 + * - S32 -> U8, S8, U16, S16, U32, F16, F32 + * - F16 -> U8, S8, U16, S16, U32, F32 + * - F32 -> U8, S8, U16, S16, U32, F16 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32. + * @param[out] output The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayerKernel * * @param[in] input Source tensor info. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32. diff --git a/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h index 637e5fa960..541506b521 100644 --- a/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h @@ -54,6 +54,14 @@ public: * @param[in] block_shape Block shape value. */ void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape); + /** Initialise the kernel's inputs and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayerKernel. * * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h index 3d1b91c81f..f68fde4737 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -57,6 +57,27 @@ public: void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NCHWKernel * * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h index 9e74be76d8..f9fda0a42c 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -58,6 +58,27 @@ public: void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel * * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED. diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h index 7e19ed6285..db26b4a06f 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h @@ -68,6 +68,28 @@ public: void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info, const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U), const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); + /** Initialize the function's source, destination and parameters + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC + * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, N, M]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread + * @param[in] dwc_info Depthwise convolution layer info + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info, + const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel * * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h index 97225c7c33..e7fc6f8d81 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h @@ -52,6 +52,14 @@ public: * @param[in] info Depthwise convolution information to reshape the input tensor. */ void configure(const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info); + /** Initialize the function's source and destination. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC + * @param[out] output The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights. + * @param[in] info Depthwise convolution information to reshape the input tensor. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel * diff --git a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h index 78b5c14128..4cb1339300 100644 --- a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h @@ -52,6 +52,13 @@ public: * @param[out] output Destination tensor. Data types supported: F16/F32. */ void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input, output, min and max. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[out] output Destination tensor. Data types supported: F16/F32. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel * * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. diff --git a/arm_compute/core/CL/kernels/CLDerivativeKernel.h b/arm_compute/core/CL/kernels/CLDerivativeKernel.h index d6be5c2b86..5d5ad860f3 100644 --- a/arm_compute/core/CL/kernels/CLDerivativeKernel.h +++ b/arm_compute/core/CL/kernels/CLDerivativeKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -56,6 +56,17 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's sources, destination and border + * + * @note At least one of output_x or output_y must be set + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLDilateKernel.h b/arm_compute/core/CL/kernels/CLDilateKernel.h index d131b34e00..9c41a84b31 100644 --- a/arm_compute/core/CL/kernels/CLDilateKernel.h +++ b/arm_compute/core/CL/kernels/CLDilateKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,6 +43,14 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /**Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); // Inherited methods overridden: BorderSize border_size() const override; diff --git a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h index 5bf9a5d57f..f1409b6339 100644 --- a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h @@ -68,6 +68,27 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. */ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); + /** Set the input, weights, biases and output tensors. + * + * @note: DirectConvolution only works in the following configurations: + * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 + * 3x3 convolution with stride_x = 1/2, stride_y = 1/2 + * 5x5 convolution with stride_x = 1/2, stride_y = 1/2 + * 9x9 convolution with stride_x = 1/2, stride_y = 1/2, data_layout=NHWC + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * The 3rd dimension must be the same as the input's volume 3rd dimension. + * Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type + * @param[out] output Output tensor. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); /** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayerKernel * * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], diff --git a/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h b/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h index 0e4d2ec0be..1f76992b96 100644 --- a/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,6 +43,14 @@ public: * @param[in] op Element wise unary operation to perform. */ void configure(const ICLTensor *input, ICLTensor *output, const ElementWiseUnary &op); + /** Initialise the kernel's inputs, output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input First tensor input. Data types supported: F16/F32. + * @param[out] output Output tensor. Data types supported: Same as @p input. + * @param[in] op Element wise unary operation to perform. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ElementWiseUnary &op); /** Static function to check if given info will lead to a valid configuration of @ref CLElementWiseUnaryLayerKernel * * @param[in] input First tensor input info. Data types supported: F16/F32. diff --git a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h index 85961f28bc..2f1060126a 100644 --- a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h +++ b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h @@ -96,6 +96,10 @@ protected: * */ void configure_common(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Commmon configure function for element-wise operators with no additional options (e.g., Div, Min, Max, SquaredDiff) + * + */ + void configure_common(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); ActivationLayerInfo _act_info; @@ -124,6 +128,18 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ConvertPolicy &policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32. + * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. + * @param[in] output Output tensor. Data types supported: Same as @p input1. + * @param[in] policy Policy to use to handle overflow. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(CLCompileContext &compile_context, ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ConvertPolicy &policy, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel * @@ -169,6 +185,17 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32. + * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. + * @param[in] output Output tensor. Data types supported: Same as @p input1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(CLCompileContext &compile_context, ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel * diff --git a/arm_compute/core/CL/kernels/CLErodeKernel.h b/arm_compute/core/CL/kernels/CLErodeKernel.h index 4fcf70b0e6..8ba6ff8408 100644 --- a/arm_compute/core/CL/kernels/CLErodeKernel.h +++ b/arm_compute/core/CL/kernels/CLErodeKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,6 +43,14 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /**Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); // Inherited methods overridden: BorderSize border_size() const override; diff --git a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h b/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h index 42eff76d21..eac03ff868 100644 --- a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h +++ b/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -57,6 +57,15 @@ public: * @param[in] config Kernel configuration. */ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] idx Digit reverse index tensor. Data type supported: U32 + * @param[in] config Kernel configuration. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config); /** Static function to check if given info will lead to a valid configuration of @ref CLFFTDigitReverseKernel * * @param[in] input Source tensor info. Data types supported: F32. diff --git a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h b/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h index b88ab1af67..85bf4cce66 100644 --- a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h +++ b/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -60,6 +60,16 @@ public: * @param[in] config FFT descriptor metadata. */ void configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config); + /** Set the input and output tensors. + * + * @note If the output tensor is nullptr, the FFT will be performed in-place + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input + * @param[in] config FFT descriptor metadata. + */ + void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config); /** Static function to check if given info will lead to a valid configuration of @ref CLFFTRadixStageKernel * * @param[in] input Source tensor info. Data types supported: F32. diff --git a/arm_compute/core/CL/kernels/CLFFTScaleKernel.h b/arm_compute/core/CL/kernels/CLFFTScaleKernel.h index 3a069fe6c0..cd4fe58b9c 100644 --- a/arm_compute/core/CL/kernels/CLFFTScaleKernel.h +++ b/arm_compute/core/CL/kernels/CLFFTScaleKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -56,6 +56,14 @@ public: * @param[in] config Kernel configuration */ void configure(ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] config Kernel configuration + */ + void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config); /** Static function to check if given info will lead to a valid configuration of @ref CLFFTScaleKernel * * @param[in] input Source tensor info. Data types supported: F32. diff --git a/arm_compute/core/CL/kernels/CLFastCornersKernel.h b/arm_compute/core/CL/kernels/CLFastCornersKernel.h index 3cca39e007..2a6102036f 100644 --- a/arm_compute/core/CL/kernels/CLFastCornersKernel.h +++ b/arm_compute/core/CL/kernels/CLFastCornersKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -66,6 +66,16 @@ public: * @param[in] border_mode Strategy to use for borders. */ void configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode); + /** Initialise the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Output image. Data types supported: U8. + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. + * @param[in] border_mode Strategy to use for borders. + */ + void configure(CLCompileContext &compile_context, const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode); // Inherited methods overridden void run(const Window &window, cl::CommandQueue &queue) override; @@ -101,6 +111,15 @@ public: * @param[out] num_buffers Number of keypoints to store the results. */ void configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers); + /** Initialise the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source image. Data types supported: U8. + * @param[in] update_number Flag to indicate whether we need to update the number of corners + * @param[out] corners Array of keypoints to store the results. + * @param[out] num_buffers Number of keypoints to store the results. + */ + void configure(CLCompileContext &compile_context, const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/arm_compute/core/CL/kernels/CLFillBorderKernel.h index 36f54c54d4..226b611bcb 100644 --- a/arm_compute/core/CL/kernels/CLFillBorderKernel.h +++ b/arm_compute/core/CL/kernels/CLFillBorderKernel.h @@ -57,6 +57,15 @@ public: * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ void configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); /** Function to set the constant value on fill border kernel depending on type. * diff --git a/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h b/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h index 1b7fdcc54f..b795e03a34 100644 --- a/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h @@ -52,6 +52,15 @@ public: * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input */ void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input and output of the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input First input tensor to flatten with at least 3 dimensions. + * The dimensions above the third will be interpreted as batches. Data types supported: All. + * @param[out] output Output tensor with shape [w*h*d, input_batches] where: + * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLFlattenLayerKernel * * @param[in] input First input tensor to flatten with at least 3 dimensions. diff --git a/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h b/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h index aa60376768..2d62a576bb 100644 --- a/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h +++ b/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -65,6 +65,25 @@ public: void configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias, const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr, float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + /** Set the source, destination of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC + * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights + * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights + * @param[out] fused_weights Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights + * @param[out] fused_bias Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights + * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights + * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights + * @note if nullptr, bn_beta is set to 0.0 + * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights + * @note if nullptr, bn_gamma is set to 1.0 + * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. + * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias, + const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr, + float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); /** Static function to check if given info will lead to a valid configuration of @ref CLFuseBatchNormalizationKernel * * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h index a8853d4c0c..e926f5ed36 100644 --- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -64,6 +64,17 @@ public: * @param[in] gemm_info (Optional) GEMM information used to retrieve the original dimensions of the input matrices */ void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMReshapeInfo &gemm_info = GEMMReshapeInfo()); + /** Initialise the kernel's input and output. + * + * @note This kernel should be used ONLY for Midgard architectures + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8 + * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 + * @param[in] gemm_info (Optional) GEMM information used to retrieve the original dimensions of the input matrices + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMReshapeInfo &gemm_info = GEMMReshapeInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyKernel * * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8 diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h index e1191f265e..d100efdcb7 100644 --- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h @@ -58,6 +58,22 @@ public: * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices */ void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, + const GEMMReshapeInfo &gemm_info); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyNativeKernel * * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h index 64a98128ce..9e3b198c8c 100644 --- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h @@ -65,6 +65,26 @@ public: * @note lhs_info.k0 must be equal to rhs_info.k0 */ void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 + * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.transpose: false + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * rhs_info.transpose: true + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @note lhs_info.k0 must be equal to rhs_info.k0 + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, + const GEMMReshapeInfo &gemm_info); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedKernel * * @param[in] input0 Input tensor info containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4. diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h index 7845d244eb..cc3c5f5186 100644 --- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h @@ -75,6 +75,33 @@ public: */ void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr, const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0 + * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/S32. + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info. + * Only the following values are supported for LHS info: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * Only the following values are supported for RHS info: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * rhs_info.transpose: true + * @param[in] vector_sum_col (Optional) Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32 + * @param[in] vector_sum_row (Optional) Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32 + * @param[in] bias (Optional) Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32. + * @param[in] output_multipliers (Optional) Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32. + * @param[in] output_shifts (Optional) Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr, + const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel * * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h index f0291044a1..d7266b2805 100644 --- a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -70,6 +70,22 @@ public: * @param[in] b_offset Offset to be added to each element of the matrix B. */ void configure(ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset, int32_t b_offset); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] mm_result Input tensor containing the result of @ref CLGEMMLowpMatrixMultiplyKernel. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] k Number of matrix A columns or Matrix B rows + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + */ + void configure(CLCompileContext &compile_context, ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset, + int32_t b_offset); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel * * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32 diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h index 4094bc681e..02ed20e5af 100644 --- a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h @@ -71,6 +71,29 @@ public: */ void configure(const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k, int32_t a_offset, int32_t b_offset, const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpMatrixMultiplyKernel. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED. + * @param[in] k Number of matrix A columns or Matrix B rows + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_stage GEMMLowp output stage info + * @param[in] output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32 + * @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32 + */ + void configure(CLCompileContext &compile_context, const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k, + int32_t a_offset, int32_t b_offset, + const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel * * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32 diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h index 439f569d07..0b5b22cafc 100644 --- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h @@ -67,6 +67,16 @@ public: * @param[in] info Output stage info. Used to pass the quantized output data type */ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] info Output stage info. Used to pass the quantized output data type + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel * * @param[in] input Input tensor. Data type supported: S32 diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h index 3378359d29..0d7d1c3390 100644 --- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h @@ -67,6 +67,16 @@ public: * @param[in] output_stage GEMMLowp output stage metadata. */ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] output_stage GEMMLowp output stage metadata. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleKernel * * @param[in] input Input tensor. Data type supported: S32 diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h index 72ca4c5455..2845d9259e 100644 --- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -68,6 +68,20 @@ public: * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0. */ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16 + * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add + * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0. + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16. + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel * * @param[in] input Input tensor info. Data type supported: S32 diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h index 22ac8fae4a..a768b6fba0 100644 --- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -70,6 +70,22 @@ public: */ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED + * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add + * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication + * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to 0 + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0 + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, + int min = 0, int max = 0); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel * * @param[in] input Input tensor. Data type supported: S32 diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h index e8066e0c3e..e319c32c78 100644 --- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -70,6 +70,22 @@ public: */ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8 + * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add + * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication + * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, + int min = 0, int max = 0); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel * * @param[in] input Input tensor. Data type supported: S32 diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h index 71681cf628..4b610fa6d0 100644 --- a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h @@ -57,6 +57,18 @@ public: * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. */ virtual void configure(const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0; + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: S8 + * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + */ + virtual void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0; protected: const ICLTensor *_input; @@ -82,6 +94,18 @@ public: * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. */ void configure(const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override; + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override; /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixAReductionKernel * * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED @@ -119,6 +143,18 @@ public: * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. */ void configure(const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override; + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override; /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixBReductionKernel * * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h index bff419cd35..037ec4d116 100644 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,6 +50,13 @@ public: * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input */ void configure(ICLTensor *accum, const ICLTensor *biases); + /** Set the accumulate buffer and the biases of the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32 + * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input + */ + void configure(CLCompileContext &compile_context, ICLTensor *accum, const ICLTensor *biases); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixAccumulateBiasesKernel * * @param[in] accum The accumulate tensor to convert. Data types supported: F16/F32 diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h index 82c4091c6e..fe34735fe4 100644 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -68,6 +68,23 @@ public: */ void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f, bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo()); + /** Initialise the kernel's input, output and alpha + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0 + * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported. + * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel + * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped + * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy + * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication + * + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f, + bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyKernel * * @param[in] input0 Input tensor containing the Matrix A info. Data types supported: F16/F32 diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h index 9bac8c9716..370ef8b3c8 100644 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -65,6 +65,27 @@ public: void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. + * @param[out] output Output tensor info. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported: + * lhs_info.m0: 1,2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same of lhs_info.k0 + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, + const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyNativeKernel * * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4. diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h index 449c333143..45df67673c 100644 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -76,6 +76,35 @@ public: void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info); + /** Initialise the kernel's input and output. + * + * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag. + * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the + * multiplications. i.e. float c = (half)a * (half)b + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4 + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3 + * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.transpose: false + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: 2,3,4,8,16 + * rhs_info.transpose: true + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @note lhs_info.k0 must be equal to rhs_info.k0 + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, + const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedKernel * * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4 diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h index b91b7ee4bd..b6285dd4db 100644 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -68,6 +68,27 @@ public: void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported: + * lhs_info.m0: 1,2,3,4,5,6,7,8 + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.k0: 2,3,4,8,16 + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.transpose: true,false + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, + const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel * * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4. diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h index 8ee911dc0e..f31c5c2280 100644 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h @@ -51,6 +51,14 @@ public: * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. */ void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); + /** Set the input and output of the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input. + * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixVectorMultiplyKernel * * @param[in] input0 The reshaped input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 diff --git a/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h b/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h index 7955b95b9a..e8e02ac281 100644 --- a/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h @@ -61,6 +61,21 @@ public: * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor */ void configure(const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary + * information to reshape the input tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.v0: greater than 0 + * lhs_info.transpose: true, false + * lhs_info.interleave: true, false + * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeLHSMatrixKernel * * @param[in] input Input tensor info. Data types supported: All diff --git a/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h b/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h index 19acd1f0e0..ada8889ac0 100644 --- a/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,20 @@ public: * rhs_info.interleave: true, false */ void configure(const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary + * information to reshape the input tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false) + * rhs_info.h0: greater than 0 + * rhs_info.transpose: true, false + * rhs_info.interleave: true, false + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeRHSMatrixKernel * * @param[in] input Input tensor info. Data types supported: All diff --git a/arm_compute/core/CL/kernels/CLGatherKernel.h b/arm_compute/core/CL/kernels/CLGatherKernel.h index 937d744108..c91b95de89 100644 --- a/arm_compute/core/CL/kernels/CLGatherKernel.h +++ b/arm_compute/core/CL/kernels/CLGatherKernel.h @@ -55,6 +55,15 @@ public: * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 */ void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); + /** Initialise the kernel's inputs and outputs + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All. + * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); /** Static function to check if given info will lead to a valid configuration of @ref CLGatherKernel * diff --git a/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h b/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h index f377c527d0..7eb7f7ae89 100644 --- a/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h +++ b/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,6 +43,14 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); // Inherited methods overridden: BorderSize border_size() const override; diff --git a/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h b/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h index 4d0402de66..37a7727d7a 100644 --- a/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h +++ b/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -41,6 +41,14 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); private: //Make the configure method of the parent class private @@ -58,6 +66,14 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); private: //Make the configure method of the parent class private diff --git a/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h b/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h index a3623f1a84..5acd7fd9b6 100644 --- a/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h +++ b/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -53,6 +53,13 @@ public: * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16. */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -85,6 +92,13 @@ public: * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8. */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U16. + * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h b/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h index abd39a4b5d..abac4b74fe 100644 --- a/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -54,6 +54,15 @@ public: * */ void configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 + * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input + * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo + * + */ + void configure(CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLComputeAllAnchorsKernel * diff --git a/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h b/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h index fb2019a176..1b1610e328 100644 --- a/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h +++ b/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -57,6 +57,15 @@ public: * @param[in] hog_info HOG's metadata */ void configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info); + /** Initialise the kernel's inputs, output and HOG's metadata + * + * @param[in] compile_context The compile context to be used. + * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. + * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 + * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[in] hog_info HOG's metadata + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -92,6 +101,14 @@ public: * @param[in] hog_info HOG's metadata */ void configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info); + /** Initialise the kernel's input, output and HOG's metadata + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog_info HOG's metadata + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h b/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h index 3018c56b65..8a326429a2 100644 --- a/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h +++ b/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -68,6 +68,21 @@ public: */ void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, uint16_t idx_class = 0); + /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel + * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects + * @param[in] num_detection_windows Number of detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the hog->info()->block_stride() + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, + const Size2D &detection_window_stride, float threshold = 0.0f, + uint16_t idx_class = 0); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue); diff --git a/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h b/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h index 3591afdf96..ed91aafb5e 100644 --- a/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h +++ b/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -67,6 +67,21 @@ public: void configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output, int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined); + /** Setup the kernel parameters + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2) + * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1) + * @param[out] output Destination image (harris score). Data types supported F32 + * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7 + * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) + * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLImage *input1, const ICLImage *input2, ICLImage *output, + int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, + bool border_undefined); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h index 5828280dc1..b9589593fa 100644 --- a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h @@ -58,6 +58,15 @@ public: * */ void configure(const ICLTensor *input, unsigned int height_offset, ICLTensor *output); + /** Initialise the kernel's inputs and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[in] height_offset The starting offset on the Y axis for the output tensor. + * @param[out] output Output tensor. Data types supported: Same as @p input. + * + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int height_offset, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLHeightConcatenateLayerKernel * * @param[in] input Input tensor info. Data types supported: All. diff --git a/arm_compute/core/CL/kernels/CLHistogramKernel.h b/arm_compute/core/CL/kernels/CLHistogramKernel.h index bb1b773802..bb0d0b3c3c 100644 --- a/arm_compute/core/CL/kernels/CLHistogramKernel.h +++ b/arm_compute/core/CL/kernels/CLHistogramKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -54,6 +54,13 @@ public: * @param[out] output Destination distribution. */ void configure(const ICLImage *input, ICLDistribution1D *output); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Destination distribution. + */ + void configure(CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -85,6 +92,13 @@ public: * @param[out] output Destination distribution. */ void configure(const ICLImage *input, ICLDistribution1D *output); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Destination distribution. + */ + void configure(CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h index 95675c8352..dddbf8d9dd 100644 --- a/arm_compute/core/CL/kernels/CLIm2ColKernel.h +++ b/arm_compute/core/CL/kernels/CLIm2ColKernel.h @@ -80,6 +80,22 @@ public: */ void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1); + /** Set the input and output of the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, + * while every dimension above represents a batch. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, + const Size2D &dilation = Size2D(1U, 1U), + unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref CLIm2ColKernel * * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], diff --git a/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h index 9982cc2f1c..93490d8e12 100644 --- a/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h @@ -58,6 +58,15 @@ public: * @param[in] info Kernel meta-data descriptor */ void configure(ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC + * In case of @p output tensor = nullptr this tensor will store the result of the normalization. + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. + * @param[in] info Kernel meta-data descriptor + */ + void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer. * diff --git a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h b/arm_compute/core/CL/kernels/CLIntegralImageKernel.h index 42b0be33c5..8e06887dba 100644 --- a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h +++ b/arm_compute/core/CL/kernels/CLIntegralImageKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -41,6 +41,13 @@ public: * @param[out] output Destination tensor, Data types supported: U32. */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output Destination tensor, Data types supported: U32. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); }; /** Interface to run the vertical pass of the integral image kernel. */ @@ -62,6 +69,12 @@ public: * @param[in,out] in_out The input/output tensor. Data types supported: U32 */ void configure(ICLTensor *in_out); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] in_out The input/output tensor. Data types supported: U32 + */ + void configure(CLCompileContext &compile_context, ICLTensor *in_out); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h b/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h index b6f1be1995..e4b7af7984 100644 --- a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,18 @@ public: * @param[in] epsilon Lower bound value for the normalization. */ void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[in] sum Sum values tensor. Data types supported: same as @p input. + * Sum will have the same number of dimensions as input. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 + * @param[in] epsilon Lower bound value for the normalization. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon); /** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayerKernel. * diff --git a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h b/arm_compute/core/CL/kernels/CLLKTrackerKernel.h index 1f24894aca..3e938c9658 100644 --- a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h +++ b/arm_compute/core/CL/kernels/CLLKTrackerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -87,6 +87,21 @@ public: void configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale); + /** Initialise the kernel input and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points + * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points + * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] level The pyramid level + * @param[in] num_levels The number of pyramid levels + * @param[in] pyramid_scale Scale factor used for generating the pyramid + */ + void configure(CLCompileContext &compile_context, const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -102,6 +117,13 @@ public: * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points */ void configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points); + /** Initialise the kernel input and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points + * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points + */ + void configure(CLCompileContext &compile_context, ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -137,6 +159,23 @@ public: ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, size_t window_dimension, size_t level); + /** Initialise the kernel input and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] old_input Pointer to the input old tensor. Data types supported: U8 + * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16 + * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16 + * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points + * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points + * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients + * @param[out] old_ival Pointer to the array holding internal values + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + */ + void configure(CLCompileContext &compile_context, const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, + size_t window_dimension, size_t level); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -175,6 +214,21 @@ public: */ void configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level); + /** Initialise the kernel input and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] new_input Pointer to the input new tensor. Data types supported: U8 + * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points + * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients + * @param[in] old_ival Pointer to the array holding internal values + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminating the algorithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + */ + void configure(CLCompileContext &compile_context, const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, + Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h index 2f5624a0a4..757e3e4f86 100644 --- a/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h +++ b/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -55,6 +55,14 @@ public: * @param[out] output Output tensor to store the result. Data type supported: same as @p input0 */ void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); + /** Initialise the kernel's input, output and alpha + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 First input tensor. Data types supported: F32 + * @param[in] input1 Second input tensor. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result. Data type supported: same as @p input0 + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLLocallyConnectedMatrixMultiplyKernel * * @param[in] input0 First input tensor info. Data types supported: F32 diff --git a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h index 75b63f94ae..390da4958d 100644 --- a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h +++ b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -60,6 +60,20 @@ public: */ void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED); + /** Initialise the kernel's input, output. + * + * @note At least one of output1 or output2 must be set. + * + * @param[in] compile_context The compile context to be used. + * @param[in] gx The input gradient X tensor. Data types supported: S16. + * @param[in] gy The input gradient Y tensor. Data types supported: S16. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16. + * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8. + * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. + * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, + MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h index 1f3129feff..ed0213abcc 100644 --- a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h +++ b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,16 @@ public: * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong). */ void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); + /** Initialise the kernel's input and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input image. Data types supported: U8. + * @param[out] mean Input average pixel value. + * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong). + * @param[out] stddev (Optional) Output standard deviation of pixel values. + * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong). + */ + void configure(CLCompileContext &compile_context, const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevKernel. * * @param[in] input Input image info. Data types supported: U8. diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h index ece0ec46e6..a21a6eed73 100644 --- a/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h +++ b/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -56,6 +56,17 @@ public: * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. */ void configure(ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f); + /** Initialise the kernel's input and outputs. + * + * @note If the output tensor is a nullptr, the normalization will be performed in-place. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr, + * this tensor will store the result of the normalization. Data types supported: F16/F32. + * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input + * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. + */ + void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f); /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevNormalizationKernel * * @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr, diff --git a/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h b/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h index 7fe5116782..df40fcf7e9 100644 --- a/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h +++ b/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,6 +43,14 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); // Inherited methods overridden: BorderSize border_size() const override; diff --git a/arm_compute/core/CL/kernels/CLMemsetKernel.h b/arm_compute/core/CL/kernels/CLMemsetKernel.h index 3e1eadfbbd..a2e61a1782 100644 --- a/arm_compute/core/CL/kernels/CLMemsetKernel.h +++ b/arm_compute/core/CL/kernels/CLMemsetKernel.h @@ -56,6 +56,14 @@ public: * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. */ void configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr); + /** Initialise the kernel's tensor and filling value + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] tensor Input tensor to fill. Supported data types: All. + * @param[in] constant_value The value used to fill the planes of the tensor + * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. + */ + void configure(CLCompileContext &compile_context, ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr); /** Static function to check if given info will lead to a valid configuration of @ref CLMemsetKernel * * @param[in] tensor Source tensor info. Data types supported: All. diff --git a/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h b/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h index f24cebb985..7a31d71553 100644 --- a/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -52,6 +52,14 @@ public: * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32. + * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor. + * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel * * @param[in] input Input tensor info. Data types supported: F32. diff --git a/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h b/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h index 67b5b38384..e57f7587fa 100644 --- a/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h +++ b/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -55,6 +55,13 @@ public: * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. */ void configure(const ICLImage *input, cl::Buffer *min_max); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input Image. Data types supported: U8/S16/F32. + * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. + */ + void configure(CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -92,6 +99,19 @@ public: */ void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr); + /** Initialise the kernel's input and outputs. + * + * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input image. Data types supported: U8/S16/F32. + * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. + * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32 + * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. + * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. + */ + void configure(CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, + ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h b/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h index a6846b2541..b255f0cb90 100644 --- a/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h +++ b/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -52,6 +52,20 @@ public: void configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, bool border_undefined); + /** Set the source, destination and border mode of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8 + * @param[out] output Destination tensor. Data types supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, + unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, + bool border_undefined); // Inherited methods overridden: BorderSize border_size() const override; diff --git a/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h b/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h index dd36a29c2a..084c77bf26 100644 --- a/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h +++ b/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -44,6 +44,14 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor) + * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor) + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); // Inherited methods overridden: BorderSize border_size() const override; diff --git a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h index 43e219a4de..350b504d50 100644 --- a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -54,6 +54,16 @@ public: * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. */ void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input. + * Data layouts supported: same as @p input. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayerKernel * * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], diff --git a/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h index 4334882fd8..addd3942eb 100644 --- a/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h @@ -57,6 +57,17 @@ public: * Data types supported: same as @p input */ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels]. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input + * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels. + * Data types supported: same as @p input + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std); /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayerKernel * * @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels]. diff --git a/arm_compute/core/CL/kernels/CLPadLayerKernel.h b/arm_compute/core/CL/kernels/CLPadLayerKernel.h index 6865ae6524..09f72088c4 100644 --- a/arm_compute/core/CL/kernels/CLPadLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLPadLayerKernel.h @@ -58,6 +58,19 @@ public: * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). */ void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT); + /** Set the input and output tensor. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] + * specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding. + * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, + * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), + PaddingMode mode = PaddingMode::CONSTANT); /** Static function to check if given info will lead to a valid configuration of @ref CLPadLayerKernel * * @param[in] input Source tensor info. Data types supported: All. diff --git a/arm_compute/core/CL/kernels/CLPermuteKernel.h b/arm_compute/core/CL/kernels/CLPermuteKernel.h index bc51f105f5..6414edb113 100644 --- a/arm_compute/core/CL/kernels/CLPermuteKernel.h +++ b/arm_compute/core/CL/kernels/CLPermuteKernel.h @@ -56,6 +56,16 @@ public: * @param[in] perm Permutation vector */ void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm); + /** Set the input and output of the kernel. + * + * @note Arbitrary permutation vectors are supported with rank not greater than 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to permute. Data types supported: All. + * @param[in] output The output tensor. Data types supported: Same as @p input + * @param[in] perm Permutation vector + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm); /** Static function to check if given info will lead to a valid configuration of @ref CLPermuteKernel * * @note Arbitrary permutation vectors are supported with rank not greater than 4 diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h index 2a54a4bc48..a9cfcc57de 100644 --- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h +++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h @@ -67,6 +67,20 @@ public: */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] input2 An input tensor. Data types supported: same as @p input1. + * @param[out] output The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate + * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplicationKernel * * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. @@ -123,6 +137,15 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 An input tensor. Data types supported: F32. Number of channels supported: 2. + * @param[in] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplicationKernel * * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2. diff --git a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h index fdd10f3f66..4ab6955110 100644 --- a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h @@ -58,6 +58,16 @@ public: * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. */ void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr); + /** Set the input and output tensors. + * + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr); /** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayerKernel * * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. diff --git a/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h b/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h index 8376934a1f..89fd656581 100644 --- a/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -58,6 +58,19 @@ public: * @param[in] aspect_ratios Aspect ratio values */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, cl::Buffer *aspect_ratios); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. + * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 + * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1 + * @param[in] info Prior box layer info. + * @param[in] min Minimum prior box values + * @param[in] max Maximum prior box values + * @param[in] aspect_ratios Aspect ratio values + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, + cl::Buffer *aspect_ratios); /** Static function to check if given info will lead to a valid configuration of @ref CLPriorBoxLayerKernel * * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC. diff --git a/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h index 07c93d3306..a651529f2b 100644 --- a/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h @@ -57,6 +57,15 @@ public: * @note Output auto initialization is not supported by this kernel */ void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input, output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. + * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. + * + * @note Output auto initialization is not supported by this kernel + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLQuantizationLayerKernel * * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. diff --git a/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h b/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h index 8dc7e4d3fc..8f4485a03b 100644 --- a/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h @@ -64,6 +64,22 @@ public: * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. */ void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner + * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. + * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); /** Static function to check if given info will lead to a valid configuration of @ref CLROIAlignLayerKernel * * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. diff --git a/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h index 2936877390..8ba1b35171 100644 --- a/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -63,6 +63,21 @@ public: * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. */ void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: F16/F32. + * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner + * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16 + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLRangeKernel.h b/arm_compute/core/CL/kernels/CLRangeKernel.h index 27dd813010..5cc4a220ca 100644 --- a/arm_compute/core/CL/kernels/CLRangeKernel.h +++ b/arm_compute/core/CL/kernels/CLRangeKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,15 @@ public: * @param[in] step The gap between each pair of values in the sequence. */ void configure(ICLTensor *output, float start, float end, float step); + /** Initialize the kernel's output tensor, start, end and step of the sequence. + * + * @param[in] compile_context The compile context to be used. + * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] start The starting value of the sequence. + * @param[in] end The ending (not including) value of the sequence. + * @param[in] step The gap between each pair of values in the sequence. + */ + void configure(CLCompileContext &compile_context, ICLTensor *output, float start, float end, float step); /** Static function to check if given info will lead to a valid configuration of @ref CLRangeKernel * * @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. diff --git a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h index 07ebd89819..bdab58bea1 100644 --- a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h +++ b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h @@ -59,6 +59,17 @@ public: * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image. */ void configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 + * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX + * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0); /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel. * diff --git a/arm_compute/core/CL/kernels/CLRemapKernel.h b/arm_compute/core/CL/kernels/CLRemapKernel.h index ce094bc321..14f4b2ddb5 100644 --- a/arm_compute/core/CL/kernels/CLRemapKernel.h +++ b/arm_compute/core/CL/kernels/CLRemapKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -55,6 +55,17 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); + /** Initialize the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] map_x Map for X coordinates. Data types supported: F32. + * @param[in] map_y Map for Y coordinates. Data types supported: F32. + * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. + * @param[in] policy The interpolation type. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h b/arm_compute/core/CL/kernels/CLReorgLayerKernel.h index c1bbb0a2ce..65304c1cc6 100644 --- a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLReorgLayerKernel.h @@ -55,6 +55,17 @@ public: * It defines the spatial distance between 2 consecutive pixels in the x and y direction */ void configure(const ICLTensor *input, ICLTensor *output, int32_t stride); + /** Initialize the kernel's input, output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32. + * @param[out] output Destination tensor with tensor shape: + * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has + * the same number of input elements. Data types supported: same as @p input. + * @param[in] stride Stride value to use for reorganizing the values in the output tensor. + * It defines the spatial distance between 2 consecutive pixels in the x and y direction + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t stride); /** Static function to check if given info will lead to a valid configuration of @ref CLReorgLayerKernel * * @param[in] input Source tensor. Data types supported: All. diff --git a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h b/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h index 77aedf32cc..f9588e818f 100644 --- a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h @@ -53,6 +53,13 @@ public: * @param[out] output Destination tensor. Data type supported: Same as @p input */ void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: All. + * @param[out] output Destination tensor. Data type supported: Same as @p input + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLReshapeLayerKernel * diff --git a/arm_compute/core/CL/kernels/CLReverseKernel.h b/arm_compute/core/CL/kernels/CLReverseKernel.h index c8d10f7ace..b1547cfb9f 100644 --- a/arm_compute/core/CL/kernels/CLReverseKernel.h +++ b/arm_compute/core/CL/kernels/CLReverseKernel.h @@ -53,6 +53,14 @@ public: * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 */ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis); + /** Initialise the kernel's inputis and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis); /** Static function to check if given info will lead to a valid configuration of @ref CLReverseKernel * diff --git a/arm_compute/core/CL/kernels/CLScaleKernel.h b/arm_compute/core/CL/kernels/CLScaleKernel.h index f93286b07d..02dfb3eccf 100644 --- a/arm_compute/core/CL/kernels/CLScaleKernel.h +++ b/arm_compute/core/CL/kernels/CLScaleKernel.h @@ -46,6 +46,19 @@ public: * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false. */ void configure(const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool align_corners = false); + /** Initialise the kernel's inputs, output and interpolation policy + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 + * @param[out] output Destination tensor. Data types supported: Same as @p input + * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] policy Interpolation type to use + * @param[in] border_mode Selected border mode. + * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER + * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, + SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool align_corners = false); /** Static function to check if given info will lead to a valid configuration of @ref CLScaleKernel * diff --git a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h b/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h index 3d4e4ebb99..1cdb66715e 100644 --- a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h +++ b/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -70,6 +70,17 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLSelectKernel.h b/arm_compute/core/CL/kernels/CLSelectKernel.h index be43add843..02f4cccfdb 100644 --- a/arm_compute/core/CL/kernels/CLSelectKernel.h +++ b/arm_compute/core/CL/kernels/CLSelectKernel.h @@ -60,6 +60,15 @@ public: * @param[in] output Output tensor. Data types supported: Same as @p x. */ void configure(const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output); + /** Initialise the kernel's inputs and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] c Condition input tensor. Data types supported: U8. + * @param[in] x First input tensor. Data types supported: All. + * @param[out] y Second input tensor. Data types supported: Same as @p x + * @param[in] output Output tensor. Data types supported: Same as @p x. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLSelectKernel * * @param[in] c Condition input tensor. Data types supported: U8. diff --git a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h b/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h index 74d83468f3..3970c07b5a 100644 --- a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h +++ b/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -56,6 +56,17 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h b/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h index 20a69ea780..0aff209931 100644 --- a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h +++ b/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -57,6 +57,17 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -99,6 +110,18 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set and the corresponding input. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16. + * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h b/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h index 3c224f7e3b..31809b1cf4 100644 --- a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h +++ b/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -57,6 +57,17 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -99,6 +110,18 @@ public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set and the corresponding input. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32. + * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h index f64739ae32..800d909a1c 100644 --- a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,6 +43,13 @@ public: * @param[out] output Destination tensor. Data types supported: same as @p input */ void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 + * @param[out] output Destination tensor. Data types supported: same as @p input + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxKernel * * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 @@ -76,6 +83,16 @@ public: * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.0 */ void configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 + * @param[in] max Max values tensor. Data types supported: same as @p input + * @param[out] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input + * @param[out] sum Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input + * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.0 + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f); /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DShiftExpSumKernel * * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 @@ -124,6 +141,16 @@ public: * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. */ void configure(const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: F16/F32 + * @param[in,out] max Max values tensor. Data types supported: same as @p input + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input + * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxShiftExpSumKernel * * @param[in] input Source tensor. Data types supported: F16/F32 @@ -182,6 +209,15 @@ public: * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. */ void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: S32/F16/F32 + * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input + * @param[out] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input + * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DNormKernel * * @param[in] input Source tensor. Data types supported: S32/F16/F32 diff --git a/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h b/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h index 6295430d40..34f0b669c4 100644 --- a/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h @@ -55,6 +55,15 @@ public: * @param[out] output Tensor output. Data types supported: same as @p input */ void configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output); + /** Initialise the kernel's inputs and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output); /** Initialise the kernel's input and output. (Static block shape and paddings) * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. @@ -65,6 +74,17 @@ public: * @param[out] output Tensor output. Data types supported: same as @p input */ void configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output); + /** Initialise the kernel's input and output. (Static block shape and paddings) + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[in] padding_left The left padding of the output tensor. + * @param[in] padding_right The right padding of the output tensor. + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. diff --git a/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h b/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h index 085c337627..3f20f665dd 100644 --- a/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h @@ -54,6 +54,14 @@ public: * @param[in] block_shape Block shape value. */ void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape); + /** Initialise the kernel's inputs and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape); /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToDepthLayerKernel. * * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. diff --git a/arm_compute/core/CL/kernels/CLStackLayerKernel.h b/arm_compute/core/CL/kernels/CLStackLayerKernel.h index 073c896035..19925c251d 100644 --- a/arm_compute/core/CL/kernels/CLStackLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLStackLayerKernel.h @@ -61,6 +61,20 @@ public: * */ void configure(const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output); + /** Initialise the kernel's inputs and output + * + * @note Supported input tensor rank: up to 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. + * @param[in] idx_input Index of the input tensor in the list of tensors to stack. + * All tensors in the list must have the same shape + * @param[in] num_tensors Number of tensors to stack + * @param[out] output Output tensor. Data types supported: Same as @p input. + * + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLStackLayerKernel * * @note Supported input tensor rank: up to 4 diff --git a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h b/arm_compute/core/CL/kernels/CLStridedSliceKernel.h index 3bcabaf5e0..2e668821bd 100644 --- a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h +++ b/arm_compute/core/CL/kernels/CLStridedSliceKernel.h @@ -67,6 +67,24 @@ public: void configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: All. + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. + * A slice of size 1 starting from starts[i] in the dimension must be preserved. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, + const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, + int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel * diff --git a/arm_compute/core/CL/kernels/CLTableLookupKernel.h b/arm_compute/core/CL/kernels/CLTableLookupKernel.h index 67d9cde6fe..9bbaf26d7a 100644 --- a/arm_compute/core/CL/kernels/CLTableLookupKernel.h +++ b/arm_compute/core/CL/kernels/CLTableLookupKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -42,6 +42,14 @@ public: * @param[out] output The output tensor. Data types supported: U8, S16. */ void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output); + /** Initialise the kernel's input, lut and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8, S16. + * @param[in] lut The input LUT. Data types supported: U8, S16. + * @param[out] output The output tensor. Data types supported: U8, S16. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output); }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLTABLELOOKUPKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLThresholdKernel.h b/arm_compute/core/CL/kernels/CLThresholdKernel.h index 3523e36fe5..79e9f01aa2 100644 --- a/arm_compute/core/CL/kernels/CLThresholdKernel.h +++ b/arm_compute/core/CL/kernels/CLThresholdKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -51,6 +51,19 @@ public: */ void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper); + /**Initialise the kernel's input, output and threshold parameters. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] threshold Threshold. When the threshold type is RANGE, this is used as the lower threshold. + * @param[in] false_value value to set when the condition is not respected. + * @param[in] true_value value to set when the condition is respected. + * @param[in] type Thresholding type. Either RANGE or BINARY. + * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, uint8_t threshold, + uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper); }; } // namespace arm_compute #endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLTileKernel.h b/arm_compute/core/CL/kernels/CLTileKernel.h index 2b0c4305cb..1c9186c4dd 100644 --- a/arm_compute/core/CL/kernels/CLTileKernel.h +++ b/arm_compute/core/CL/kernels/CLTileKernel.h @@ -55,6 +55,16 @@ public: * */ void configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples); + /** Set the source, destination of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: All. + * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. + * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported). + * @param[out] output Destination tensor. Same as @p input + * + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples); /** Static function to check if given info will lead to a valid configuration of @ref CLTileKernel * * @param[in] input Source tensor info. Data type supported: All. diff --git a/arm_compute/core/CL/kernels/CLTransposeKernel.h b/arm_compute/core/CL/kernels/CLTransposeKernel.h index 0adebde398..37bd716f3d 100644 --- a/arm_compute/core/CL/kernels/CLTransposeKernel.h +++ b/arm_compute/core/CL/kernels/CLTransposeKernel.h @@ -44,6 +44,13 @@ public: * @param[out] output Output tensor. Data type supported: Same as @p input */ void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLTransposeKernel * * @param[in] input Input tensor. Data types supported: All. diff --git a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h b/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h index 6f632aab46..556e5484d7 100644 --- a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h @@ -55,6 +55,15 @@ public: * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. */ void configure(const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] info Contains stride information described in @ref Size2D. + * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy); /** Static function to check if given info will lead to a valid configuration of @ref CLUpsampleLayerKernel * * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. diff --git a/arm_compute/core/CL/kernels/CLWarpAffineKernel.h b/arm_compute/core/CL/kernels/CLWarpAffineKernel.h index e4d0be62c6..bd26705ea4 100644 --- a/arm_compute/core/CL/kernels/CLWarpAffineKernel.h +++ b/arm_compute/core/CL/kernels/CLWarpAffineKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -44,6 +44,16 @@ public: * @param[in] policy The interpolation type. */ void configure(const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 2x3 of type float + * The matrix argument requires 9 values, the last 3 values are ignored. + * @param[in] policy The interpolation type. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); // Inherited methods overridden: BorderSize border_size() const override; diff --git a/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h b/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h index cf6cb4cf64..4f4ff34f1d 100644 --- a/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h +++ b/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -42,6 +42,15 @@ public: * @param[in] policy The interpolation type. */ void configure(const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 3x3 of type float. + * @param[in] policy The interpolation type. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); // Inherited methods overridden: BorderSize border_size() const override; diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h index 3e29feaa4d..f09eea958c 100644 --- a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h +++ b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -79,6 +79,20 @@ public: * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. */ void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1); + /** Set the input and output of the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All + * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr. + * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. + * @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise. + * Data types supported: Same as @p input + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref CLWeightsReshapeKernel * * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h index 1a781446e6..50abf65983 100644 --- a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h +++ b/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h @@ -57,6 +57,14 @@ public: * @param[out] output Output tensor. Data types supported: Same as @p input1. */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Initialise the kernel's input1s and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 First input tensor. Data types supported: All. + * @param[in] input2 Second input tensor. Data types supported: same as @p input1 + * @param[out] output Output tensor. Data types supported: Same as @p input1. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate2TensorsKernel * * @param[in] input1 First tensor info. Data types supported: All. diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h index 34b8257f00..f203602a12 100644 --- a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h +++ b/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h @@ -59,6 +59,16 @@ public: * @param[out] output Output tensor. Data types supported: Same as @p input1. */ void configure(const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4, ICLTensor *output); + /** Initialise the kernel's input1s and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 First input tensor. Data types supported: All. + * @param[in] input2 Second input tensor. Data types supported: same as @p input1 + * @param[in] input3 Third input tensor. Data types supported: same as @p input1 + * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1 + * @param[out] output Output tensor. Data types supported: Same as @p input1. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate4TensorsKernel * * @param[in] input1 First tensor info. Data types supported: All. diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h index 5dcae67c45..4564d774e3 100644 --- a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h @@ -58,6 +58,15 @@ public: * */ void configure(const ICLTensor *input, unsigned int width_offset, ICLTensor *output); + /** Initialise the kernel's inputs and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[in] width_offset The offset on the X axis. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int width_offset, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel * * @param[in] input Input tensor info. Data types supported: All. diff --git a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h index 4b1de0fb5c..bc7573dc9e 100644 --- a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h +++ b/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -64,6 +64,25 @@ public: * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo */ void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); + /** Set the input and output tensor. + * + * @note Winograd filter transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd filter transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32. + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradFilterTransformKernel * * @note Winograd filter transform supports the following configurations for NCWH data layout diff --git a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h index 03c215bf1f..6bb8d6e616 100644 --- a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h +++ b/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -62,6 +62,25 @@ public: * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. */ void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); + /** Set the input and output of the kernel. + * + * @note Winograd input transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd input transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to transform. Data types supported: F16/F32 + * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransformKernel * * @note Winograd input transform supports the following configurations for NCWH data layout diff --git a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h index c61e78674f..aab244bb90 100644 --- a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h +++ b/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -66,6 +66,28 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Set the input and output tensor. + * + * @note Winograd output transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd output transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32. + * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradOutputTransformKernel * diff --git a/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h b/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h index 1fae0ed44b..c03fc94f91 100644 --- a/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -62,6 +62,18 @@ public: * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) */ void configure(ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) + */ + void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); /** Static function to check if given info will lead to a valid configuration of @ref CLYOLOLayerKernel * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result diff --git a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h index 9c62f96f6d..040ca157de 100644 --- a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h +++ b/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -68,6 +68,27 @@ public: virtual void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0; + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. + * Data type supported: Same as @p input, QASYMM8/QSYMM8_PER_CHANNEL when input is QASYMM8. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported for QASYMM8. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + virtual void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0; protected: BorderSize _border_size; diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp index 557046e831..52ca1d1710 100644 --- a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp +++ b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -45,6 +45,11 @@ CLAbsoluteDifferenceKernel::CLAbsoluteDifferenceKernel() } void CLAbsoluteDifferenceKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output); +} + +void CLAbsoluteDifferenceKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16); @@ -63,7 +68,7 @@ void CLAbsoluteDifferenceKernel::configure(const ICLTensor *input1, const ICLTen build_opts.insert("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("absdiff", build_opts)); + _kernel = create_kernel(compile_context, "absdiff", build_opts); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 16; diff --git a/src/core/CL/kernels/CLAccumulateKernel.cpp b/src/core/CL/kernels/CLAccumulateKernel.cpp index 12ee210243..aa13b4a207 100644 --- a/src/core/CL/kernels/CLAccumulateKernel.cpp +++ b/src/core/CL/kernels/CLAccumulateKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -39,25 +39,35 @@ constexpr unsigned int num_elems_processed_per_iteration = 16; } // namespace void CLAccumulateKernel::configure(const ICLTensor *input, ICLTensor *accum) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, accum); +} + +void CLAccumulateKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::S16); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("accumulate")); + _kernel = create_kernel(compile_context, "accumulate"); // Make sure _kernel is initialized before calling the parent's configure ICLSimple2DKernel::configure(input, accum, num_elems_processed_per_iteration); } void CLAccumulateWeightedKernel::configure(const ICLTensor *input, float alpha, ICLTensor *accum) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, alpha, accum); +} + +void CLAccumulateWeightedKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::U8); ARM_COMPUTE_ERROR_ON(alpha < 0.0 || alpha > 1.0); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("accumulate_weighted")); + _kernel = create_kernel(compile_context, "accumulate_weighted"); // Set static kernel arguments unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters @@ -68,13 +78,18 @@ void CLAccumulateWeightedKernel::configure(const ICLTensor *input, float alpha, } void CLAccumulateSquaredKernel::configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, shift, accum); +} + +void CLAccumulateSquaredKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::S16); ARM_COMPUTE_ERROR_ON(shift > 15); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("accumulate_squared")); + _kernel = create_kernel(compile_context, "accumulate_squared"); // Set static kernel arguments unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index 2ab0240076..15ae8e3d04 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -115,12 +115,17 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } } // namespace -CLActivationLayerKernel::CLActivationLayerKernel(CLCoreRuntimeContext *ctx) - : _input(nullptr), _output(nullptr), _run_in_place(false), _ctx(ctx) +CLActivationLayerKernel::CLActivationLayerKernel() + : _input(nullptr), _output(nullptr), _run_in_place(false) { } void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, act_info); +} + +void CLActivationLayerKernel::configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); @@ -226,7 +231,8 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act } // Create kernel - _kernel = create_opencl_kernel(_ctx, kernel_name, build_opts); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); + // Make sure _kernel is initialized before calling the parent's configure _input = input; _output = output; diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp index 21709a447f..4e33744094 100644 --- a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp @@ -115,6 +115,11 @@ CLArgMinMaxLayerKernel::CLArgMinMaxLayerKernel() } void CLArgMinMaxLayerKernel::configure(const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, prev_output, output, axis, op); +} + +void CLArgMinMaxLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr, output->info(), axis, op)); @@ -167,7 +172,7 @@ void CLArgMinMaxLayerKernel::configure(const ICLTensor *input, const ICLTensor * default: ARM_COMPUTE_ERROR("Not supported"); } - _kernel = static_cast(CLKernelLibrary::get().create_kernel("arg_min_max_" + kernel_axis_name, build_opts.options())); + _kernel = create_kernel(compile_context, "arg_min_max_" + kernel_axis_name, build_opts.options()); // Configure kernel window ICLKernel::configure_internal(std::get<1>(win_config), lws_hint); diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp index e6e2cb9c4f..610d8e8f62 100644 --- a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp @@ -85,6 +85,11 @@ CLBatchConcatenateLayerKernel::CLBatchConcatenateLayerKernel() } void CLBatchConcatenateLayerKernel::configure(const ICLTensor *input, unsigned int batch_offset, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, batch_offset, output); +} + +void CLBatchConcatenateLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int batch_offset, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), batch_offset, output->info())); @@ -111,7 +116,7 @@ void CLBatchConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("concatenate", build_opts.options())); + _kernel = create_kernel(compile_context, "concatenate", build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), batch_offset, output->info()); diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp index a5c8b1cc6b..8776541536 100644 --- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp @@ -138,6 +138,13 @@ CLBatchNormalizationLayerKernel::CLBatchNormalizationLayerKernel() void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon, ActivationLayerInfo act_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, mean, var, beta, gamma, epsilon, act_info); +} + +void CLBatchNormalizationLayerKernel::configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, + const ICLTensor *gamma, + float epsilon, ActivationLayerInfo act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, mean, var); @@ -169,7 +176,7 @@ void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *out build_opts.add_option_if(gamma == nullptr, "-DUSE_DEFAULT_GAMMA"); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("batchnormalization_layer_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options())); + _kernel = create_kernel(compile_context, "batchnormalization_layer_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()); // Set kernel static arguments unsigned int include_output = (!_run_in_place) ? 1 : 0; diff --git a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp index 8046213488..fbdd04c424 100644 --- a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp @@ -84,6 +84,11 @@ CLBatchToSpaceLayerKernel::CLBatchToSpaceLayerKernel() } void CLBatchToSpaceLayerKernel::configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, block_shape, output); +} + +void CLBatchToSpaceLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), block_shape->info(), output->info())); @@ -99,7 +104,7 @@ void CLBatchToSpaceLayerKernel::configure(const ICLTensor *input, const ICLTenso build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option("-DBATCH_SIZE=" + support::cpp11::to_string(input->info()->dimension(3))); build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width))); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("batch_to_space_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options())); + _kernel = create_kernel(compile_context, "batch_to_space_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()); // Configure kernel window Window win = calculate_max_window(*input->info(), Steps()); @@ -107,6 +112,11 @@ void CLBatchToSpaceLayerKernel::configure(const ICLTensor *input, const ICLTenso } void CLBatchToSpaceLayerKernel::configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, block_shape_x, block_shape_y, output); +} + +void CLBatchToSpaceLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -127,7 +137,7 @@ void CLBatchToSpaceLayerKernel::configure(const ICLTensor *input, const int32_t build_opts.add_option("-DBLOCK_SHAPE_X=" + support::cpp11::to_string(block_shape_x)); build_opts.add_option("-DBLOCK_SHAPE_Y=" + support::cpp11::to_string(block_shape_y)); build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width))); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("batch_to_space_static_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options())); + _kernel = create_kernel(compile_context, "batch_to_space_static_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()); // Configure kernel window Window win = calculate_max_window(*input->info(), Steps()); diff --git a/src/core/CL/kernels/CLBitwiseAndKernel.cpp b/src/core/CL/kernels/CLBitwiseAndKernel.cpp index 2d05f2e2bd..df23b90310 100644 --- a/src/core/CL/kernels/CLBitwiseAndKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseAndKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,11 @@ CLBitwiseAndKernel::CLBitwiseAndKernel() { } void CLBitwiseAndKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output); +} + +void CLBitwiseAndKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8); @@ -48,7 +53,7 @@ void CLBitwiseAndKernel::configure(const ICLTensor *input1, const ICLTensor *inp _output = output; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("bitwise_and")); + _kernel = create_kernel(compile_context, "bitwise_and"); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 16; diff --git a/src/core/CL/kernels/CLBitwiseNotKernel.cpp b/src/core/CL/kernels/CLBitwiseNotKernel.cpp index 0098e15ab6..2abfa46301 100644 --- a/src/core/CL/kernels/CLBitwiseNotKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseNotKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -32,6 +32,11 @@ using namespace arm_compute; void CLBitwiseNotKernel::configure(const ICLTensor *input, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output); +} + +void CLBitwiseNotKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); @@ -40,7 +45,7 @@ void CLBitwiseNotKernel::configure(const ICLTensor *input, ICLTensor *output) _output = output; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("bitwise_not")); + _kernel = create_kernel(compile_context, "bitwise_not"); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 16; diff --git a/src/core/CL/kernels/CLBitwiseOrKernel.cpp b/src/core/CL/kernels/CLBitwiseOrKernel.cpp index b3efab8b1f..8ab509ae7f 100644 --- a/src/core/CL/kernels/CLBitwiseOrKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseOrKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -39,6 +39,11 @@ CLBitwiseOrKernel::CLBitwiseOrKernel() } void CLBitwiseOrKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output); +} + +void CLBitwiseOrKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8); @@ -49,7 +54,7 @@ void CLBitwiseOrKernel::configure(const ICLTensor *input1, const ICLTensor *inpu _output = output; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("bitwise_or")); + _kernel = create_kernel(compile_context, "bitwise_or"); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 16; diff --git a/src/core/CL/kernels/CLBitwiseXorKernel.cpp b/src/core/CL/kernels/CLBitwiseXorKernel.cpp index d8ac486d0f..c3ff7de820 100644 --- a/src/core/CL/kernels/CLBitwiseXorKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseXorKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -39,6 +39,11 @@ CLBitwiseXorKernel::CLBitwiseXorKernel() } void CLBitwiseXorKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output); +} + +void CLBitwiseXorKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8); @@ -49,7 +54,7 @@ void CLBitwiseXorKernel::configure(const ICLTensor *input1, const ICLTensor *inp _output = output; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("bitwise_xor")); + _kernel = create_kernel(compile_context, "bitwise_xor"); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 16; diff --git a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp index bca23c7259..5ed5523632 100644 --- a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp +++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp @@ -89,6 +89,11 @@ CLBoundingBoxTransformKernel::CLBoundingBoxTransformKernel() } void CLBoundingBoxTransformKernel::configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info) +{ + configure(CLKernelLibrary::get().get_compile_context(), boxes, pred_boxes, deltas, info); +} + +void CLBoundingBoxTransformKernel::configure(CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(boxes, pred_boxes, deltas); auto_init_if_empty(*pred_boxes->info(), deltas->info()->clone()->set_data_type(boxes->info()->data_type()).set_quantization_info(boxes->info()->quantization_info())); @@ -137,7 +142,7 @@ void CLBoundingBoxTransformKernel::configure(const ICLTensor *boxes, ICLTensor * // Create kernel const std::string kernel_name = (is_quantized) ? "bounding_box_transform_quantized" : "bounding_box_transform"; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Since the number of columns is a multiple of 4 by definition, we don't need to pad the tensor const unsigned int num_elems_processed_per_iteration = 4; diff --git a/src/core/CL/kernels/CLBox3x3Kernel.cpp b/src/core/CL/kernels/CLBox3x3Kernel.cpp index b81697f778..e0979a8aa3 100644 --- a/src/core/CL/kernels/CLBox3x3Kernel.cpp +++ b/src/core/CL/kernels/CLBox3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -41,6 +41,11 @@ BorderSize CLBox3x3Kernel::border_size() const } void CLBox3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined); +} + +void CLBox3x3Kernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); @@ -56,7 +61,7 @@ void CLBox3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool b }; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("convolution3x3_static", build_opts)); + _kernel = create_kernel(compile_context, "convolution3x3_static", build_opts); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 8; diff --git a/src/core/CL/kernels/CLCannyEdgeKernel.cpp b/src/core/CL/kernels/CLCannyEdgeKernel.cpp index 08bfe90892..c1aa611566 100644 --- a/src/core/CL/kernels/CLCannyEdgeKernel.cpp +++ b/src/core/CL/kernels/CLCannyEdgeKernel.cpp @@ -39,6 +39,11 @@ CLGradientKernel::CLGradientKernel() } void CLGradientKernel::configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type) +{ + configure(CLKernelLibrary::get().get_compile_context(), gx, gy, magnitude, phase, norm_type); +} + +void CLGradientKernel::configure(CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gx, 1, DataType::S16, DataType::S32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gy, 1, DataType::S16, DataType::S32); @@ -61,7 +66,7 @@ void CLGradientKernel::configure(const ICLTensor *gx, const ICLTensor *gy, ICLTe // Create kernel const std::string kernel_name = (norm_type == 1) ? std::string("combine_gradients_L1") : std::string("combine_gradients_L2"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, built_opts)); + _kernel = create_kernel(compile_context, kernel_name, built_opts); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 4; @@ -119,6 +124,11 @@ BorderSize CLEdgeNonMaxSuppressionKernel::border_size() const } void CLEdgeNonMaxSuppressionKernel::configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), magnitude, phase, output, lower_thr, border_undefined); +} + +void CLEdgeNonMaxSuppressionKernel::configure(CLCompileContext &compile_context, const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(magnitude, 1, DataType::U16, DataType::U32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(phase, 1, DataType::U8); @@ -135,7 +145,7 @@ void CLEdgeNonMaxSuppressionKernel::configure(const ICLTensor *magnitude, const // Create kernel const std::string kernel_name = std::string("suppress_non_maximum"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, built_opts)); + _kernel = create_kernel(compile_context, kernel_name, built_opts); // Set minimum threshold argument unsigned int idx = 3 * num_arguments_per_2D_tensor(); //Skip the input and output parameters @@ -194,6 +204,12 @@ CLEdgeTraceKernel::CLEdgeTraceKernel() void CLEdgeTraceKernel::configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, upper_thr, lower_thr, visited, recorded, l1_stack, l1_stack_counter); +} + +void CLEdgeTraceKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, + ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16, DataType::U32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); @@ -218,7 +234,7 @@ void CLEdgeTraceKernel::configure(const ICLTensor *input, ICLTensor *output, int // Create kernel const std::string kernel_name = std::string("hysteresis"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, built_opts)); + _kernel = create_kernel(compile_context, kernel_name, built_opts); // Set constant kernel args unsigned int width = _input->info()->dimension(0); diff --git a/src/core/CL/kernels/CLChannelCombineKernel.cpp b/src/core/CL/kernels/CLChannelCombineKernel.cpp index d029efe110..90face2ccc 100644 --- a/src/core/CL/kernels/CLChannelCombineKernel.cpp +++ b/src/core/CL/kernels/CLChannelCombineKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -52,6 +52,11 @@ CLChannelCombineKernel::CLChannelCombineKernel() } void CLChannelCombineKernel::configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), plane0, plane1, plane2, plane3, output); +} + +void CLChannelCombineKernel::configure(CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(plane0, plane1, plane2, output); ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane0); @@ -109,7 +114,7 @@ void CLChannelCombineKernel::configure(const ICLTensor *plane0, const ICLTensor // Create kernel std::string kernel_name = "channel_combine_" + string_from_format(output_format); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name)); + _kernel = create_kernel(compile_context, kernel_name); // Configure window Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); @@ -135,6 +140,11 @@ void CLChannelCombineKernel::configure(const ICLTensor *plane0, const ICLTensor } void CLChannelCombineKernel::configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), plane0, plane1, plane2, output); +} + +void CLChannelCombineKernel::configure(CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(plane0, plane1, plane2, output); ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane0); @@ -211,7 +221,7 @@ void CLChannelCombineKernel::configure(const ICLImage *plane0, const ICLImage *p } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); // Configure window Window win = calculate_max_window(*plane0->info(), Steps(num_elems_processed_per_iteration)); diff --git a/src/core/CL/kernels/CLChannelExtractKernel.cpp b/src/core/CL/kernels/CLChannelExtractKernel.cpp index d2a0f984da..8df162c4ee 100644 --- a/src/core/CL/kernels/CLChannelExtractKernel.cpp +++ b/src/core/CL/kernels/CLChannelExtractKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -48,6 +48,11 @@ CLChannelExtractKernel::CLChannelExtractKernel() } void CLChannelExtractKernel::configure(const ICLTensor *input, Channel channel, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, channel, output); +} + +void CLChannelExtractKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_ON(input == output); @@ -89,7 +94,7 @@ void CLChannelExtractKernel::configure(const ICLTensor *input, Channel channel, // Create kernel std::string kernel_name = "channel_extract_" + string_from_format(format); std::set build_opts = { ("-DCHANNEL_" + string_from_channel(channel)) }; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); // Configure window Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration)); @@ -105,6 +110,11 @@ void CLChannelExtractKernel::configure(const ICLTensor *input, Channel channel, } void CLChannelExtractKernel::configure(const ICLMultiImage *input, Channel channel, ICLImage *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, channel, output); +} + +void CLChannelExtractKernel::configure(CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); @@ -151,7 +161,7 @@ void CLChannelExtractKernel::configure(const ICLMultiImage *input, Channel chann kernel_name = "channel_extract_" + string_from_format(format); build_opts.insert(("-DCHANNEL_" + string_from_channel(channel))); } - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); // Configure window Window win = calculate_max_window(*input_plane->info(), Steps(_num_elems_processed_per_iteration)); diff --git a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp index 27bf6b9034..5e6bbb395b 100644 --- a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp +++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp @@ -91,6 +91,11 @@ CLChannelShuffleLayerKernel::CLChannelShuffleLayerKernel() } void CLChannelShuffleLayerKernel::configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, num_groups); +} + +void CLChannelShuffleLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -116,7 +121,7 @@ void CLChannelShuffleLayerKernel::configure(const ICLTensor *input, ICLTensor *o // Create kernel std::string kernel_name = "channel_shuffle_" + lower_string(string_from_data_layout(data_layout)); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info()); diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp index 643c2aaa84..d96ec96126 100644 --- a/src/core/CL/kernels/CLCol2ImKernel.cpp +++ b/src/core/CL/kernels/CLCol2ImKernel.cpp @@ -90,6 +90,11 @@ CLCol2ImKernel::CLCol2ImKernel() } void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, convolved_dims, num_groups); +} + +void CLCol2ImKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -110,7 +115,7 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, const build_opts.add_option("-DWIDTH_OUTPUT=" + support::cpp11::to_string(_convolved_dims.width)); build_opts.add_option("-DNUM_GROUPS=" + support::cpp11::to_string(num_groups)); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("col2im", build_opts.options())); + _kernel = create_kernel(compile_context, "col2im", build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info(), _convolved_dims, num_groups); diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp index f2eb848684..720d925427 100644 --- a/src/core/CL/kernels/CLColorConvertKernel.cpp +++ b/src/core/CL/kernels/CLColorConvertKernel.cpp @@ -47,6 +47,11 @@ CLColorConvertKernel::CLColorConvertKernel() } void CLColorConvertKernel::configure(const ICLTensor *input, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output); +} + +void CLColorConvertKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { ARM_COMPUTE_ERROR_ON(input == nullptr); ARM_COMPUTE_ERROR_ON(output == nullptr); @@ -114,7 +119,7 @@ void CLColorConvertKernel::configure(const ICLTensor *input, ICLTensor *output) _output = output; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name.str())); + _kernel = create_kernel(compile_context, kernel_name.str()); // Configure kernel window Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); @@ -138,6 +143,11 @@ void CLColorConvertKernel::configure(const ICLTensor *input, ICLTensor *output) } void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output); +} + +void CLColorConvertKernel::configure(CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output) { ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); ARM_COMPUTE_ERROR_ON(output == nullptr); @@ -180,7 +190,7 @@ void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *outpu _output = output; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name.str())); + _kernel = create_kernel(compile_context, kernel_name.str()); // Configure kernel window const bool has_two_planes = (input->info()->format() == Format::NV12) || (input->info()->format() == Format::NV21); @@ -223,6 +233,11 @@ void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *outpu } void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output); +} + +void CLColorConvertKernel::configure(CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output) { ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); ARM_COMPUTE_ERROR_ON(output == nullptr); @@ -289,7 +304,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu _multi_output = output; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name.str())); + _kernel = create_kernel(compile_context, kernel_name.str()); // Configure kernel window Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); @@ -330,6 +345,11 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu } void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output); +} + +void CLColorConvertKernel::configure(CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output) { unsigned int num_elems_processed_per_iteration = 0; switch(input->info()->format()) @@ -387,7 +407,7 @@ void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage * float sub_sampling_input = (has_two_input_planars || (input->info()->format() == Format::IYUV)) ? 0.5f : 1; float sub_sampling_output = (has_two_output_planars || (output->info()->format() == Format::IYUV)) ? 0.5f : 1; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name.str())); + _kernel = create_kernel(compile_context, kernel_name.str()); Window win = calculate_max_window(*input->cl_plane(0)->info(), Steps(num_elems_processed_per_iteration)); win.set_dimension_step(Window::DimY, 2); diff --git a/src/core/CL/kernels/CLComparisonKernel.cpp b/src/core/CL/kernels/CLComparisonKernel.cpp index e010743bad..61aeebea5a 100644 --- a/src/core/CL/kernels/CLComparisonKernel.cpp +++ b/src/core/CL/kernels/CLComparisonKernel.cpp @@ -107,6 +107,11 @@ CLComparisonKernel::CLComparisonKernel() } void CLComparisonKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, operation); +} + +void CLComparisonKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1->info(), *input2->info(), *output->info(), operation)); @@ -141,7 +146,7 @@ void CLComparisonKernel::configure(const ICLTensor *input1, const ICLTensor *inp } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); ICLKernel::configure_internal(win_config.second); diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp index 016a7bbf4a..f57ff6c07e 100644 --- a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp +++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp @@ -40,6 +40,12 @@ CLConvertFullyConnectedWeightsKernel::CLConvertFullyConnectedWeightsKernel() void CLConvertFullyConnectedWeightsKernel::configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, original_input_shape, data_layout); +} + +void CLConvertFullyConnectedWeightsKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, + DataLayout data_layout) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -70,7 +76,7 @@ void CLConvertFullyConnectedWeightsKernel::configure(const ICLTensor *input, ICL build_opts.add_option("-DFACTOR_2=" + support::cpp11::to_string(factor_2)); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("convert_fc_weights", build_opts.options())); + _kernel = create_kernel(compile_context, "convert_fc_weights", build_opts.options()); // Configure kernel window Window win = calculate_max_window(*input->info(), Steps()); diff --git a/src/core/CL/kernels/CLConvolutionKernel.cpp b/src/core/CL/kernels/CLConvolutionKernel.cpp index 2e1c56c3ba..3cc6d24de2 100644 --- a/src/core/CL/kernels/CLConvolutionKernel.cpp +++ b/src/core/CL/kernels/CLConvolutionKernel.cpp @@ -59,6 +59,12 @@ BorderSize CLConvolutionKernel::border_size() const template void CLConvolutionKernel::configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, scale, border_undefined); +} + +template +void CLConvolutionKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16); @@ -92,7 +98,7 @@ void CLConvolutionKernel::configure(const ICLTensor *input, ICLTens out_type << "-DDATA_TYPE_OUT=" << get_cl_type_from_data_type(output->info()->data_type()); build_opts.add_option(out_type.str()); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name.str(), build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name.str(), build_opts.options()); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 8; @@ -129,6 +135,12 @@ BorderSize CLSeparableConvolutionHorKernel::border_size template void CLSeparableConvolutionHorKernel::configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, border_undefined); +} + +template +void CLSeparableConvolutionHorKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U16, DataType::S16, DataType::S32); @@ -156,7 +168,7 @@ void CLSeparableConvolutionHorKernel::configure(const ICLTensor *in // Create kernel const std::string kernel_name = "convolution_separable1x" + support::cpp11::to_string(matrix_size) + "_static"; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 8; @@ -199,6 +211,13 @@ BorderSize CLSeparableConvolutionVertKernel::border_siz template void CLSeparableConvolutionVertKernel::configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, scale, border_undefined, data_type); +} + +template +void CLSeparableConvolutionVertKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, + const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16, DataType::S16, DataType::S32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16); @@ -230,7 +249,7 @@ void CLSeparableConvolutionVertKernel::configure(const ICLTensor *i // Create kernel const std::string kernel_name = "convolution_separable" + support::cpp11::to_string(matrix_size) + "x1_static"; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 8; @@ -280,6 +299,12 @@ BorderSize CLConvolutionRectangleKernel::border_size() const } void CLConvolutionRectangleKernel::configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, width, height, scale, border_undefined); +} + +void CLConvolutionRectangleKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, + bool border_undefined) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16); @@ -317,7 +342,7 @@ void CLConvolutionRectangleKernel::configure(const ICLTensor *input, ICLTensor * options.insert("-DMATRIX_WIDTH=" + support::cpp11::to_string(width)); options.insert("-DMATRIX_HEIGHT=" + support::cpp11::to_string(height)); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("convolution_rectangle", options)); + _kernel = create_kernel(compile_context, "convolution_rectangle", options); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 8; diff --git a/src/core/CL/kernels/CLCopyKernel.cpp b/src/core/CL/kernels/CLCopyKernel.cpp index 8a7353bcc2..e59223e511 100644 --- a/src/core/CL/kernels/CLCopyKernel.cpp +++ b/src/core/CL/kernels/CLCopyKernel.cpp @@ -156,6 +156,11 @@ CLCopyKernel::CLCopyKernel() } void CLCopyKernel::configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, Window *output_window) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, output_window); +} + +void CLCopyKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, Window *output_window) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), padding, output_window)); @@ -199,7 +204,7 @@ void CLCopyKernel::configure(const ICLTensor *input, ICLTensor *output, const Pa } // Build kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("copy_tensor", build_opts.options())); + _kernel = create_kernel(compile_context, "copy_tensor", build_opts.options()); } else { @@ -217,7 +222,7 @@ void CLCopyKernel::configure(const ICLTensor *input, ICLTensor *output, const Pa } // Build kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("copy_pad_tensor", build_opts.options())); + _kernel = create_kernel(compile_context, "copy_pad_tensor", build_opts.options()); // Configure window win_config = validate_and_configure_window_with_padding(input->info(), output->info(), padding); diff --git a/src/core/CL/kernels/CLCropKernel.cpp b/src/core/CL/kernels/CLCropKernel.cpp index e51861e9aa..2c17c99559 100644 --- a/src/core/CL/kernels/CLCropKernel.cpp +++ b/src/core/CL/kernels/CLCropKernel.cpp @@ -48,6 +48,12 @@ CLCropKernel::CLCropKernel() } void CLCropKernel::configure(const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, Window *output_window) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, start, end, batch_index, extrapolation_value, output_window); +} + +void CLCropKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, + Window *output_window) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), start, end, batch_index, extrapolation_value, output_window)); @@ -86,7 +92,7 @@ void CLCropKernel::configure(const ICLTensor *input, ICLTensor *output, Coordina build_opts.add_option_if(multi_access_x && remainder_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max(output_width_x - vec_size_x, 0))); build_opts.add_option_if(start.x > end.x, "-DWIDTH_FLIPPED="); build_opts.add_option_if(start.y > end.y, "-DHEIGHT_FLIPPED="); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("crop_tensor", build_opts.options())); + _kernel = create_kernel(compile_context, "crop_tensor", build_opts.options()); } Status CLCropKernel::validate(const ITensorInfo *input, const ITensorInfo *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, Window *output_window) diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp index ee392032ca..f92f7da37f 100644 --- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp @@ -69,6 +69,12 @@ Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, co void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, info); +} + +void CLDeconvolutionLayerUpsampleKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, + const PadStrideInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -83,7 +89,7 @@ void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTe // Create kernel CLBuildOptions build_opts; build_opts.add_option(("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size()))); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("deconvolution_upsample", build_opts.options())); + _kernel = create_kernel(compile_context, "deconvolution_upsample", build_opts.options()); constexpr unsigned int num_elems_processed_per_iteration = 1; diff --git a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp index 2704c74ff9..68607e9fc6 100644 --- a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp @@ -115,6 +115,13 @@ CLDeconvolutionReshapeOutputKernel::CLDeconvolutionReshapeOutputKernel() void CLDeconvolutionReshapeOutputKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, input_info, weights_info, deconv_info); +} + +void CLDeconvolutionReshapeOutputKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, + const ITensorInfo *weights_info, + const PadStrideInfo &deconv_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, input_info, weights_info); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (bias != nullptr ? bias->info() : nullptr), output->info(), input_info, weights_info, deconv_info)); @@ -148,7 +155,7 @@ void CLDeconvolutionReshapeOutputKernel::configure(const ICLTensor *input, const build_opts.add_option_if(data_layout == DataLayout::NCHW, "-DNUM_FILTERS=" + support::cpp11::to_string(filter_b)); build_opts.add_option_if(_add_bias, "-DADD_BIAS"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("deconvolution_reshape", build_opts.options())); + _kernel = create_kernel(compile_context, "deconvolution_reshape", build_opts.options()); ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp index 99489036b4..241adb297b 100644 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp @@ -83,6 +83,11 @@ CLDepthConcatenateLayerKernel::CLDepthConcatenateLayerKernel() } void CLDepthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, depth_offset, output); +} + +void CLDepthConcatenateLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int depth_offset, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), depth_offset, output->info())); @@ -109,7 +114,7 @@ void CLDepthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("concatenate", build_opts.options())); + _kernel = create_kernel(compile_context, "concatenate", build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), depth_offset, output->info()); diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp index 13687a540d..2e29dbf92a 100644 --- a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp @@ -73,6 +73,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, C } // namespace void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, policy, shift); +} + +void CLDepthConvertLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -100,14 +105,14 @@ void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *out // Create kernel const std::string kernel_name = (input_size >= output_size) ? "convert_depth_down" : "convert_depth_up"; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set shift arg unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters _kernel.setArg(idx++, shift); // Configure kernel - ICLSimple3DKernel::configure(input, output, num_elems_processed_per_iteration); + ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration); // Collapse window const Window &full_window = window(); diff --git a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp index ad0d398c88..cd61a91ec5 100644 --- a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp @@ -66,6 +66,11 @@ CLDepthToSpaceLayerKernel::CLDepthToSpaceLayerKernel() } void CLDepthToSpaceLayerKernel::configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, block_shape); +} + +void CLDepthToSpaceLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -87,7 +92,7 @@ void CLDepthToSpaceLayerKernel::configure(const ICLTensor *input, ICLTensor *out build_opts.add_option("-DCHANNEL_SIZE=" + support::cpp11::to_string(input->info()->dimension(idx_channel))); build_opts.add_option("-DBLOCK_SHAPE=" + support::cpp11::to_string(block_shape)); build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width))); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("depth_to_space_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options())); + _kernel = create_kernel(compile_context, "depth_to_space_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()); // Configure kernel window Window win = calculate_max_window(*input->info(), Steps()); diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp index a9875675c5..e293fa264f 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp @@ -245,6 +245,13 @@ BorderSize CLDepthwiseConvolutionLayer3x3NCHWKernel::border_size() const void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation, const ICLTensor *output_multipliers, const ICLTensor *output_shifts) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation, output_multipliers, output_shifts); +} + +void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, + const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation, + const ICLTensor *output_multipliers, const ICLTensor *output_shifts) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), @@ -337,7 +344,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, build_opts.add_option_if(input->info()->data_type() == DataType::F16, "-DIS_F16"); build_opts.add_option_if(input->info()->data_type() == DataType::F32, "-DIS_F32"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set config_id for enabling LWS tuning _config_id = kernel_name; diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp index 4afca2b31f..71af63a97f 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp @@ -198,6 +198,13 @@ BorderSize CLDepthwiseConvolutionLayer3x3NHWCKernel::border_size() const void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation, const ICLTensor *output_multipliers, const ICLTensor *output_shifts) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation, output_multipliers, output_shifts); +} + +void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, + const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation, + const ICLTensor *output_multipliers, const ICLTensor *output_shifts) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), @@ -328,7 +335,7 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, build_opts.add_option_if(input->info()->data_type() == DataType::F32, "-DIS_F32"); ICLKernel::configure_internal(win_config.second); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set config_id for enabling LWS tuning _config_id = kernel_name; diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp index b4025ffdaf..45df9ed59a 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp @@ -195,6 +195,14 @@ CLDepthwiseConvolutionLayerNativeKernel::CLDepthwiseConvolutionLayerNativeKernel void CLDepthwiseConvolutionLayerNativeKernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info, const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const ICLTensor *output_multipliers, const ICLTensor *output_shifts) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation, output_multipliers, output_shifts); +} + +void CLDepthwiseConvolutionLayerNativeKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, + const DWCWeightsKernelInfo &dwc_weights_info, + const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, + const ICLTensor *output_multipliers, const ICLTensor *output_shifts) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), @@ -284,7 +292,7 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const ICLTensor *input, } ICLKernel::configure_internal(win_config.second); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set config_id for enabling LWS tuning _config_id = kernel_name; diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp index f746f1feff..7e38e77107 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp @@ -87,6 +87,11 @@ CLDepthwiseConvolutionLayerReshapeWeightsKernel::CLDepthwiseConvolutionLayerResh } void CLDepthwiseConvolutionLayerReshapeWeightsKernel::configure(const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, info); +} + +void CLDepthwiseConvolutionLayerReshapeWeightsKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), info)); @@ -105,7 +110,7 @@ void CLDepthwiseConvolutionLayerReshapeWeightsKernel::configure(const ICLTensor build_opts.add_option_if(info.transpose, "-DTRANSPOSE"); build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size())); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("depthwise_convolution_reshape_weights", build_opts.options())); + _kernel = create_kernel(compile_context, "depthwise_convolution_reshape_weights", build_opts.options()); } Status CLDepthwiseConvolutionLayerReshapeWeightsKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const DepthwiseConvolutionReshapeInfo &info) diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp index 5d9a8faa54..ae7489f0a8 100644 --- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp @@ -76,6 +76,11 @@ CLDequantizationLayerKernel::CLDequantizationLayerKernel() } void CLDequantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output); +} + +void CLDequantizationLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); @@ -120,7 +125,7 @@ void CLDequantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *o build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max(output_width_x - vec_size_x, 0))); // Create kernel name - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); } Status CLDequantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output) diff --git a/src/core/CL/kernels/CLDerivativeKernel.cpp b/src/core/CL/kernels/CLDerivativeKernel.cpp index bfda041104..670cde308c 100644 --- a/src/core/CL/kernels/CLDerivativeKernel.cpp +++ b/src/core/CL/kernels/CLDerivativeKernel.cpp @@ -49,6 +49,11 @@ BorderSize CLDerivativeKernel::border_size() const } void CLDerivativeKernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined); +} + +void CLDerivativeKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); @@ -85,7 +90,7 @@ void CLDerivativeKernel::configure(const ICLTensor *input, ICLTensor *output_x, // Create kernel const std::string kernel_name = std::string("derivative"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 16; diff --git a/src/core/CL/kernels/CLDilateKernel.cpp b/src/core/CL/kernels/CLDilateKernel.cpp index 89853d7b19..0f6879dcc8 100644 --- a/src/core/CL/kernels/CLDilateKernel.cpp +++ b/src/core/CL/kernels/CLDilateKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,12 +37,17 @@ BorderSize CLDilateKernel::border_size() const } void CLDilateKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined); +} + +void CLDilateKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("dilate")); + _kernel = create_kernel(compile_context, "dilate"); _input = input; _output = output; diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index d1dfcd9bb1..ff3d106f46 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -422,6 +422,12 @@ BorderSize CLDirectConvolutionLayerKernel::border_size() const } void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info); +} + +void CLDirectConvolutionLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, + const PadStrideInfo &conv_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); @@ -491,7 +497,7 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL build_options.add_option(std::string("-DWEIGHTS_DEPTH=" + support::cpp11::to_string(_weights->info()->dimension(channel_idx)))); kernel_name << "_f32_bifrost"; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name.str(), build_options.options())); + _kernel = create_kernel(compile_context, kernel_name.str(), build_options.options()); } else { @@ -535,7 +541,7 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL build_options.add_option("-DKERNEL_SIZE=" + support::cpp11::to_string(kernel_size)); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("direct_convolution_quantized", build_options.options())); + _kernel = create_kernel(compile_context, "direct_convolution_quantized", build_options.options()); // Set static kernel arguments unsigned int idx = 3 * num_arguments_per_3D_tensor() + ((_biases != nullptr) ? num_arguments_per_1D_tensor() : 0) + 1; @@ -546,7 +552,7 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL else { // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name.str(), build_options.options())); + _kernel = create_kernel(compile_context, kernel_name.str(), build_options.options()); } } diff --git a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp index a1aada4990..7356c5a5cd 100644 --- a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp +++ b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp @@ -51,6 +51,11 @@ Status validate_arguments(const ITensorInfo &input, const ITensorInfo &output) } // namespace void CLElementWiseUnaryLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const ElementWiseUnary &op) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, op); +} + +void CLElementWiseUnaryLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ElementWiseUnary &op) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input->info(), *output->info())); @@ -105,7 +110,7 @@ void CLElementWiseUnaryLayerKernel::configure(const ICLTensor *input, ICLTensor } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); } Status CLElementWiseUnaryLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ElementWiseUnary &op) diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp index 0f2e26f186..ee4ef40b87 100644 --- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp +++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp @@ -236,6 +236,11 @@ CLElementwiseOperationKernel::CLElementwiseOperationKernel() } void CLElementwiseOperationKernel::configure_common(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) +{ + configure_common(CLKernelLibrary::get().get_compile_context(), input1, input2, output); +} + +void CLElementwiseOperationKernel::configure_common(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1->info(), *input2->info(), *output->info())); @@ -264,7 +269,7 @@ void CLElementwiseOperationKernel::configure_common(const ICLTensor *input1, con } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); ICLKernel::configure_internal(win_config.second); @@ -328,11 +333,18 @@ BorderSize CLElementwiseOperationKernel::border_size() const void CLSaturatedArithmeticOperationKernel::configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ConvertPolicy &policy, const ActivationLayerInfo &act_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), op, input1, input2, output, policy, act_info); +} + +void CLSaturatedArithmeticOperationKernel::configure(CLCompileContext &compile_context, ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, + const ConvertPolicy &policy, + const ActivationLayerInfo &act_info) { _policy = policy; _op = op; _act_info = act_info; - configure_common(input1, input2, output); + configure_common(compile_context, input1, input2, output); } Status CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ConvertPolicy &policy, @@ -380,10 +392,16 @@ std::string CLSaturatedArithmeticOperationKernel::name() /** Arithmetic operations*/ void CLArithmeticOperationKernel::configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), op, input1, input2, output, act_info); +} + +void CLArithmeticOperationKernel::configure(CLCompileContext &compile_context, ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, + const ActivationLayerInfo &act_info) { _op = op; _act_info = act_info; - configure_common(input1, input2, output); + configure_common(compile_context, input1, input2, output); } Status CLArithmeticOperationKernel::validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) diff --git a/src/core/CL/kernels/CLErodeKernel.cpp b/src/core/CL/kernels/CLErodeKernel.cpp index e56b71a75e..e959d1c320 100644 --- a/src/core/CL/kernels/CLErodeKernel.cpp +++ b/src/core/CL/kernels/CLErodeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,12 +37,17 @@ BorderSize CLErodeKernel::border_size() const } void CLErodeKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined); +} + +void CLErodeKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("erode")); + _kernel = create_kernel(compile_context, "erode"); _input = input; _output = output; diff --git a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp index 822f155b85..5542ad72f4 100644 --- a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp +++ b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp @@ -74,6 +74,11 @@ CLFFTDigitReverseKernel::CLFFTDigitReverseKernel() } void CLFFTDigitReverseKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, idx, config); +} + +void CLFFTDigitReverseKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, idx); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), idx->info(), config)); @@ -87,7 +92,7 @@ void CLFFTDigitReverseKernel::configure(const ICLTensor *input, ICLTensor *outpu build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(input->info()->num_channels())); build_opts.add_option_if(config.conjugate, "-DCONJ"); std::string kernel_name = "fft_digit_reverse_axis_" + support::cpp11::to_string(config.axis); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info(), idx->info(), config); diff --git a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp index 73df5b226c..6e7e1eff06 100644 --- a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp +++ b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp @@ -84,6 +84,11 @@ CLFFTRadixStageKernel::CLFFTRadixStageKernel() } void CLFFTRadixStageKernel::configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, config); +} + +void CLFFTRadixStageKernel::configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr, config)); @@ -101,7 +106,7 @@ void CLFFTRadixStageKernel::configure(ICLTensor *input, ICLTensor *output, const kernel_name += "_radix_" + support::cpp11::to_string(config.radix); kernel_name += (config.is_first_stage) ? "_first_stage" : ""; kernel_name += "_axis_" + support::cpp11::to_string(config.axis); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set static arguments if not the first stage if(!config.is_first_stage) diff --git a/src/core/CL/kernels/CLFFTScaleKernel.cpp b/src/core/CL/kernels/CLFFTScaleKernel.cpp index 312c73746d..32e652ad1d 100644 --- a/src/core/CL/kernels/CLFFTScaleKernel.cpp +++ b/src/core/CL/kernels/CLFFTScaleKernel.cpp @@ -77,6 +77,11 @@ CLFFTScaleKernel::CLFFTScaleKernel() } void CLFFTScaleKernel::configure(ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, config); +} + +void CLFFTScaleKernel::configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr)); @@ -91,7 +96,7 @@ void CLFFTScaleKernel::configure(ICLTensor *input, ICLTensor *output, const FFTS build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(output != nullptr ? output->info()->num_channels() : input->info()->num_channels())); build_opts.add_option_if(config.conjugate, "-DCONJ"); std::string kernel_name = "fft_scale_conj"; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set static arguments unsigned int idx = (1 + (_run_in_place ? 0 : 1)) * num_arguments_per_3D_tensor(); // Skip the input and output parameters diff --git a/src/core/CL/kernels/CLFastCornersKernel.cpp b/src/core/CL/kernels/CLFastCornersKernel.cpp index 79ab3b7197..2bd4d89bbc 100644 --- a/src/core/CL/kernels/CLFastCornersKernel.cpp +++ b/src/core/CL/kernels/CLFastCornersKernel.cpp @@ -49,6 +49,11 @@ BorderSize CLFastCornersKernel::border_size() const } void CLFastCornersKernel::configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, threshold, non_max_suppression, border_mode); +} + +void CLFastCornersKernel::configure(CLCompileContext &compile_context, const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode) { ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); @@ -69,7 +74,7 @@ void CLFastCornersKernel::configure(const ICLImage *input, ICLImage *output, flo // Create kernel const std::string kernel_name = std::string("fast_corners"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); // Set static kernel arguments unsigned int idx = 2 * num_arguments_per_2D_tensor(); // Skip the input and output parameters @@ -132,6 +137,11 @@ CLCopyToArrayKernel::CLCopyToArrayKernel() } void CLCopyToArrayKernel::configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, update_number, corners, num_buffers); +} + +void CLCopyToArrayKernel::configure(CLCompileContext &compile_context, const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers) { ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); @@ -151,7 +161,7 @@ void CLCopyToArrayKernel::configure(const ICLImage *input, bool update_number, I // Create kernel const std::string kernel_name = std::string("copy_to_keypoint"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); //Get how many pixels skipped in the x dimension in the previous stages unsigned int offset = _input->info()->valid_region().anchor.x(); diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp index c7a83fc2a4..c69a8c9f92 100644 --- a/src/core/CL/kernels/CLFillBorderKernel.cpp +++ b/src/core/CL/kernels/CLFillBorderKernel.cpp @@ -61,6 +61,11 @@ void CLFillBorderKernel::set_constant_border(unsigned int idx, const PixelValue } void CLFillBorderKernel::configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value) +{ + configure(CLKernelLibrary::get().get_compile_context(), tensor, border_size, border_mode, constant_border_value); +} + +void CLFillBorderKernel::configure(CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value) { ARM_COMPUTE_ERROR_ON(tensor == nullptr); ARM_COMPUTE_ERROR_ON(tensor->info()->num_channels() != 1); @@ -87,7 +92,7 @@ void CLFillBorderKernel::configure(ICLTensor *tensor, BorderSize border_size, Bo build_opts.add_option("-DBORDER_SIZE_RIGHT=" + support::cpp11::to_string(border_size.right)); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); _tensor = tensor; // Create static kernel arguments diff --git a/src/core/CL/kernels/CLFlattenLayerKernel.cpp b/src/core/CL/kernels/CLFlattenLayerKernel.cpp index c078b0d14a..c2dc933f5a 100644 --- a/src/core/CL/kernels/CLFlattenLayerKernel.cpp +++ b/src/core/CL/kernels/CLFlattenLayerKernel.cpp @@ -81,6 +81,11 @@ CLFlattenLayerKernel::CLFlattenLayerKernel() } void CLFlattenLayerKernel::configure(const ICLTensor *input, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output); +} + +void CLFlattenLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); @@ -101,7 +106,7 @@ void CLFlattenLayerKernel::configure(const ICLTensor *input, ICLTensor *output) build_opts.add_option_if(output->info()->num_dimensions() > 2, "-DDST_DIM1=" + support::cpp11::to_string(output->info()->dimension(1))); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("flatten", build_opts.options())); + _kernel = create_kernel(compile_context, "flatten", build_opts.options()); // Set config_id for enabling LWS tuning _config_id = "flatten"; diff --git a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp index 1e1c3e0eef..6f4ba0ed06 100644 --- a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp @@ -108,6 +108,14 @@ void CLFuseBatchNormalizationKernel::configure(const ICLTensor *input_weights, c ICLTensor *fused_weights, ICLTensor *fused_bias, const ICLTensor *input_bias, const ICLTensor *bn_beta, const ICLTensor *bn_gamma, float epsilon, FuseBatchNormalizationType fbn_type) +{ + configure(CLKernelLibrary::get().get_compile_context(), input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type); +} + +void CLFuseBatchNormalizationKernel::configure(CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, + ICLTensor *fused_weights, ICLTensor *fused_bias, + const ICLTensor *input_bias, const ICLTensor *bn_beta, const ICLTensor *bn_gamma, + float epsilon, FuseBatchNormalizationType fbn_type) { ARM_COMPUTE_ERROR_ON_NULLPTR(input_weights, bn_mean, bn_var); @@ -162,7 +170,7 @@ void CLFuseBatchNormalizationKernel::configure(const ICLTensor *input_weights, c build_opts.add_option_if(bn_gamma != nullptr, "-DGAMMA"); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("fuse_batchnormalization_layer", build_opts.options())); + _kernel = create_kernel(compile_context, "fuse_batchnormalization_layer", build_opts.options()); } Status CLFuseBatchNormalizationKernel::validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp index d13884f267..0d4bbba0d4 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp @@ -171,6 +171,11 @@ CLGEMMLowpMatrixMultiplyKernel::CLGEMMLowpMatrixMultiplyKernel() } void CLGEMMLowpMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMReshapeInfo &gemm_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input0, input1, output, gemm_info); +} + +void CLGEMMLowpMatrixMultiplyKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMReshapeInfo &gemm_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); @@ -218,7 +223,7 @@ void CLGEMMLowpMatrixMultiplyKernel::configure(const ICLTensor *input0, const IC kernel_name = "gemmlowp_mm_midgard"; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set config_id for enabling LWS tuning _config_id = kernel_name; diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp index 82ba824848..bcf71565af 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp @@ -172,6 +172,13 @@ CLGEMMLowpMatrixMultiplyNativeKernel::CLGEMMLowpMatrixMultiplyNativeKernel() void CLGEMMLowpMatrixMultiplyNativeKernel::configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input0, input1, output, lhs_info, rhs_info, gemm_info); +} + +void CLGEMMLowpMatrixMultiplyNativeKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMReshapeInfo &gemm_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); @@ -223,7 +230,7 @@ void CLGEMMLowpMatrixMultiplyNativeKernel::configure(const ICLTensor *input0, co std::string kernel_name("gemmlowp_mm_native"); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set config_id for enabling LWS tuning _config_id = kernel_name; diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp index dd1f221a1f..ebb00a45d5 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp @@ -167,6 +167,13 @@ CLGEMMLowpMatrixMultiplyReshapedKernel::CLGEMMLowpMatrixMultiplyReshapedKernel() void CLGEMMLowpMatrixMultiplyReshapedKernel::configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input0, input1, output, lhs_info, rhs_info, gemm_info); +} + +void CLGEMMLowpMatrixMultiplyReshapedKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMReshapeInfo &gemm_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); @@ -214,7 +221,7 @@ void CLGEMMLowpMatrixMultiplyReshapedKernel::configure(const ICLTensor *input0, kernel_name += rhs_info.transpose ? "rhs_t" : "rhs_nt"; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set config_id for enabling LWS tuning _config_id = kernel_name; diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp index cef5b34de0..dd4c55c2d8 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp @@ -309,6 +309,13 @@ CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::CLGEMMLowpMatrixMultiplyReshapedO void CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, const ICLTensor *output_multipliers, const ICLTensor *output_shifts) +{ + configure(CLKernelLibrary::get().get_compile_context(), input0, input1, output, gemm_info, vector_sum_col, vector_sum_row, bias, output_multipliers, output_shifts); +} + +void CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, + const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, + const ICLTensor *output_multipliers, const ICLTensor *output_shifts) { ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), @@ -420,7 +427,7 @@ void CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::configure(const ICLTensor *i } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set config_id for enabling LWS tuning _config_id = kernel_name; diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp index 49905fe3e0..fd2cc7a680 100644 --- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp @@ -144,6 +144,13 @@ CLGEMMLowpOffsetContributionKernel::CLGEMMLowpOffsetContributionKernel() void CLGEMMLowpOffsetContributionKernel::configure(ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset, int32_t b_offset) +{ + configure(CLKernelLibrary::get().get_compile_context(), mm_result, vector_sum_col, vector_sum_row, bias, k, a_offset, b_offset); +} + +void CLGEMMLowpOffsetContributionKernel::configure(CLCompileContext &compile_context, ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, + int32_t k, int32_t a_offset, + int32_t b_offset) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(mm_result); @@ -182,7 +189,7 @@ void CLGEMMLowpOffsetContributionKernel::configure(ICLTensor *mm_result, const I std::string kernel_name("gemmlowp_offset_contribution"); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(mm_result->info(), diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp index bae0a134ea..d52fb21574 100644 --- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp @@ -183,6 +183,14 @@ CLGEMMLowpOffsetContributionOutputStageKernel::CLGEMMLowpOffsetContributionOutpu void CLGEMMLowpOffsetContributionOutputStageKernel::configure(const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k, int32_t a_offset, int32_t b_offset, const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts) +{ + configure(CLKernelLibrary::get().get_compile_context(), mm_result, vector_sum_col, vector_sum_row, bias, output, k, a_offset, b_offset, output_stage, output_multipliers, output_shifts); +} + +void CLGEMMLowpOffsetContributionOutputStageKernel::configure(CLCompileContext &compile_context, const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, + const ICLTensor *bias, ICLTensor *output, + int32_t k, int32_t a_offset, int32_t b_offset, const GEMMLowpOutputStageInfo &output_stage, + const ICLTensor *output_multipliers, const ICLTensor *output_shifts) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(mm_result, output, output_multipliers, output_shifts); @@ -242,7 +250,7 @@ void CLGEMMLowpOffsetContributionOutputStageKernel::configure(const ICLTensor *m kernel_name += "_" + string_from_gemmlowp_output_stage(output_stage.type); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(mm_result->info(), diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp index 5a554f3111..171dc48112 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp @@ -116,6 +116,12 @@ Status CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel::validate(const ITensorInfo void CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, info); +} + +void CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, + const GEMMLowpOutputStageInfo *info) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -138,7 +144,7 @@ void CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel::configure(const ICLTensor *i build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("gemmlowp_output_stage_quantize_down_float", build_opts.options())); + _kernel = create_kernel(compile_context, "gemmlowp_output_stage_quantize_down_float", build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info(), info->output_data_type); diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp index 002af6b471..ca85e8b655 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp @@ -106,6 +106,11 @@ Status CLGEMMLowpQuantizeDownInt32ScaleKernel::validate(const ITensorInfo *input } void CLGEMMLowpQuantizeDownInt32ScaleKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, output_stage); +} + +void CLGEMMLowpQuantizeDownInt32ScaleKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -135,7 +140,7 @@ void CLGEMMLowpQuantizeDownInt32ScaleKernel::configure(const ICLTensor *input, c build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("gemmlowp_output_stage_quantize_down", build_opts.options())); + _kernel = create_kernel(compile_context, "gemmlowp_output_stage_quantize_down", build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info(), output_stage->output_data_type); diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp index 22300b952d..00cef56db7 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp @@ -122,6 +122,13 @@ Status CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::validate(const void CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min, int max) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, result_fixedpoint_multiplier, result_shift, min, max); +} + +void CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, + int result_fixedpoint_multiplier, int result_shift, + int min, int max) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -141,7 +148,7 @@ void CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::configure(const build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16", build_opts.options())); + _kernel = create_kernel(compile_context, "gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16", build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info()); diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp index 9e0570a5e1..b6d98e6749 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp @@ -117,6 +117,13 @@ Status CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::validate(const void CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min, int max) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max); +} + +void CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, + int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, + int min, int max) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -140,7 +147,7 @@ void CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::configure(const I build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("gemmlowp_output_stage_quantize_down_fixedpoint", build_opts.options())); + _kernel = create_kernel(compile_context, "gemmlowp_output_stage_quantize_down_fixedpoint", build_opts.options()); ICLKernel::configure_internal(win_config.second); } diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp index 6ab3272bd7..7f2f2e75a9 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp @@ -117,6 +117,13 @@ Status CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::validate(const void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min, int max) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max); +} + +void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, + int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, + int min, int max) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -140,7 +147,7 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::configure(const build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("gemmlowp_output_stage_quantize_down_fixedpoint", build_opts.options())); + _kernel = create_kernel(compile_context, "gemmlowp_output_stage_quantize_down_fixedpoint", build_opts.options()); ICLKernel::configure_internal(win_config.second); } diff --git a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp index 832e6281f4..e81ab2ffba 100644 --- a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp @@ -84,6 +84,11 @@ ICLGEMMLowpReductionKernel::ICLGEMMLowpReductionKernel() } void CLGEMMLowpMatrixAReductionKernel::configure(const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) +{ + configure(CLKernelLibrary::get().get_compile_context(), mtx_a, vector_sum_row, info); +} + +void CLGEMMLowpMatrixAReductionKernel::configure(CLCompileContext &compile_context, const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(mtx_a, vector_sum_row); @@ -104,7 +109,7 @@ void CLGEMMLowpMatrixAReductionKernel::configure(const ICLTensor *mtx_a, ICLTens std::string kernel_name = "gemmlowp_matrix_a_reduction" + std::string(is_dot8_supported ? "_dot8" : ""); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure kernel window // This kernel does not need padding @@ -153,6 +158,11 @@ void CLGEMMLowpMatrixAReductionKernel::run(const Window &window, cl::CommandQueu } void CLGEMMLowpMatrixBReductionKernel::configure(const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) +{ + configure(CLKernelLibrary::get().get_compile_context(), mtx_b, vector_sum_col, info); +} + +void CLGEMMLowpMatrixBReductionKernel::configure(CLCompileContext &compile_context, const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(mtx_b, vector_sum_col); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_matrix_b_reduction(mtx_b->info(), vector_sum_col->info())); @@ -169,7 +179,7 @@ void CLGEMMLowpMatrixBReductionKernel::configure(const ICLTensor *mtx_b, ICLTens build_opts.add_option_if(info.mul_by_scalar, "-DSCALAR=" + support::cpp11::to_string(info.scalar)); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("gemmlowp_matrix_b_reduction", build_opts.options())); + _kernel = create_kernel(compile_context, "gemmlowp_matrix_b_reduction", build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window_matrix_b_reduction(_input->info(), _output->info()); diff --git a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp index 806afb4e1a..045ae282d6 100644 --- a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp @@ -78,6 +78,11 @@ CLGEMMMatrixAccumulateBiasesKernel::CLGEMMMatrixAccumulateBiasesKernel() } void CLGEMMMatrixAccumulateBiasesKernel::configure(ICLTensor *accum, const ICLTensor *biases) +{ + configure(CLKernelLibrary::get().get_compile_context(), accum, biases); +} + +void CLGEMMMatrixAccumulateBiasesKernel::configure(CLCompileContext &compile_context, ICLTensor *accum, const ICLTensor *biases) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(accum, biases); @@ -101,7 +106,7 @@ void CLGEMMMatrixAccumulateBiasesKernel::configure(ICLTensor *accum, const ICLTe build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size)); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("gemm_accumulate_biases", build_opts.options())); + _kernel = create_kernel(compile_context, "gemm_accumulate_biases", build_opts.options()); } Status CLGEMMMatrixAccumulateBiasesKernel::validate(const ITensorInfo *accum, const ITensorInfo *biases, GPUTarget gpu_target) diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp index 8e764033e0..9587a042be 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp @@ -306,6 +306,12 @@ CLGEMMMatrixMultiplyKernel::CLGEMMMatrixMultiplyKernel() void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, bool fp_mixed_precision, const ActivationLayerInfo &activation_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input0, input1, input2, output, alpha, beta, is_interleaved_transposed, reshape_info, fp_mixed_precision, activation_info); +} + +void CLGEMMMatrixMultiplyKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, + bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, bool fp_mixed_precision, const ActivationLayerInfo &activation_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); @@ -430,7 +436,7 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set config_id for enabling LWS tuning _config_id = "gemm_"; diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp index d150391b00..af4b097c72 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp @@ -213,6 +213,14 @@ CLGEMMMatrixMultiplyNativeKernel::CLGEMMMatrixMultiplyNativeKernel() void CLGEMMMatrixMultiplyNativeKernel::configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input0, input1, input2, output, alpha, beta, lhs_info, rhs_info, gemm_info); +} + +void CLGEMMMatrixMultiplyNativeKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, + float beta, + const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); @@ -281,7 +289,7 @@ void CLGEMMMatrixMultiplyNativeKernel::configure(const ICLTensor *input0, const std::string kernel_name("gemm_mm_native"); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set config_id for enabling LWS tuning _config_id = kernel_name; diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp index 3f154a5790..eb01486087 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp @@ -215,6 +215,14 @@ CLGEMMMatrixMultiplyReshapedKernel::CLGEMMMatrixMultiplyReshapedKernel() void CLGEMMMatrixMultiplyReshapedKernel::configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input0, input1, input2, output, alpha, beta, lhs_info, rhs_info, gemm_info); +} + +void CLGEMMMatrixMultiplyReshapedKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, + float beta, + const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); @@ -277,7 +285,7 @@ void CLGEMMMatrixMultiplyReshapedKernel::configure(const ICLTensor *input0, cons kernel_name += rhs_info.transpose ? "rhs_t" : "rhs_nt"; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set config_id for enabling LWS tuning _config_id = kernel_name; diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp index ed66997a03..011e93d9b3 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp @@ -216,6 +216,14 @@ CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::CLGEMMMatrixMultiplyReshapedOnlyRHSKe void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input0, input1, input2, output, alpha, beta, lhs_info, rhs_info, gemm_info); +} + +void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, + float beta, + const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); @@ -287,7 +295,7 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(const ICLTensor *input kernel_name += rhs_info.transpose ? "t" : "nt"; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set config_id for enabling LWS tuning _config_id = kernel_name; diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp index 03889bd55f..98a1dee758 100644 --- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp @@ -82,6 +82,11 @@ BorderSize CLGEMMMatrixVectorMultiplyKernel::border_size() const } void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input0, input1, output); +} + +void CLGEMMMatrixVectorMultiplyKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info())); @@ -100,7 +105,7 @@ void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input0->info()->dimension(1))); std::string kernel_name = is_quantized ? std::string("gemm_mv_quantized") : std::string("gemm_mv"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Add static arguments if(is_quantized) diff --git a/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp index 6f92522cc0..73e3106ff8 100644 --- a/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp +++ b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp @@ -120,6 +120,11 @@ CLGEMMReshapeLHSMatrixKernel::CLGEMMReshapeLHSMatrixKernel() } void CLGEMMReshapeLHSMatrixKernel::configure(const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, lhs_info, reinterpret_input_as_3d); +} + +void CLGEMMReshapeLHSMatrixKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -146,7 +151,7 @@ void CLGEMMReshapeLHSMatrixKernel::configure(const ICLTensor *input, ICLTensor * kernel_name += lhs_info.transpose ? "t" : "nt"; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info(), lhs_info, reinterpret_input_as_3d); diff --git a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp index 766b3e69dd..1623b1e552 100644 --- a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp +++ b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp @@ -101,6 +101,11 @@ CLGEMMReshapeRHSMatrixKernel::CLGEMMReshapeRHSMatrixKernel() } void CLGEMMReshapeRHSMatrixKernel::configure(const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, rhs_info); +} + +void CLGEMMReshapeRHSMatrixKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -124,7 +129,7 @@ void CLGEMMReshapeRHSMatrixKernel::configure(const ICLTensor *input, ICLTensor * kernel_name += rhs_info.transpose ? "t" : "nt"; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info(), rhs_info); diff --git a/src/core/CL/kernels/CLGatherKernel.cpp b/src/core/CL/kernels/CLGatherKernel.cpp index ae77945852..6bee66ab93 100644 --- a/src/core/CL/kernels/CLGatherKernel.cpp +++ b/src/core/CL/kernels/CLGatherKernel.cpp @@ -88,6 +88,11 @@ CLGatherKernel::CLGatherKernel() } void CLGatherKernel::configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, indices, output, axis); +} + +void CLGatherKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), indices->info(), output->info(), axis)); @@ -109,7 +114,7 @@ void CLGatherKernel::configure(const ICLTensor *input, const ICLTensor *indices, build_opts.add_option("-DAXIS=" + support::cpp11::to_string(_axis)); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("gather", build_opts.options())); + _kernel = create_kernel(compile_context, "gather", build_opts.options()); ICLKernel::configure_internal(win_config.second); } diff --git a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp index 7e8f3139f2..0edf46b506 100644 --- a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp +++ b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -40,6 +40,11 @@ BorderSize CLGaussian3x3Kernel::border_size() const } void CLGaussian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined); +} + +void CLGaussian3x3Kernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); @@ -55,7 +60,7 @@ void CLGaussian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, b }; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("convolution3x3_static", build_opts)); + _kernel = create_kernel(compile_context, "convolution3x3_static", build_opts); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 8; diff --git a/src/core/CL/kernels/CLGaussian5x5Kernel.cpp b/src/core/CL/kernels/CLGaussian5x5Kernel.cpp index 3b45b07ed9..98436b950f 100644 --- a/src/core/CL/kernels/CLGaussian5x5Kernel.cpp +++ b/src/core/CL/kernels/CLGaussian5x5Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -28,18 +28,28 @@ using namespace arm_compute; void CLGaussian5x5HorKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined); +} + +void CLGaussian5x5HorKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined) { const std::array matrix = { 1, 4, 6, 4, 1 }; // Set arguments - CLSeparableConvolution5x5HorKernel::configure(input, output, matrix.data(), border_undefined); + CLSeparableConvolution5x5HorKernel::configure(compile_context, input, output, matrix.data(), border_undefined); } void CLGaussian5x5VertKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined); +} + +void CLGaussian5x5VertKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined) { const uint32_t scale = 256; const std::array matrix = { 1, 4, 6, 4, 1 }; // Set arguments - CLSeparableConvolution5x5VertKernel::configure(input, output, matrix.data(), scale, border_undefined); + CLSeparableConvolution5x5VertKernel::configure(compile_context, input, output, matrix.data(), scale, border_undefined); } diff --git a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp index fd21bb69aa..8486d45e1a 100644 --- a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp +++ b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp @@ -43,6 +43,11 @@ BorderSize CLGaussianPyramidHorKernel::border_size() const } void CLGaussianPyramidHorKernel::configure(const ICLTensor *input, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output); +} + +void CLGaussianPyramidHorKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U16); @@ -58,7 +63,7 @@ void CLGaussianPyramidHorKernel::configure(const ICLTensor *input, ICLTensor *ou // Create kernel const std::string kernel_name = std::string("gaussian1x5_sub_x"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name)); + _kernel = create_kernel(compile_context, kernel_name); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 16; @@ -149,6 +154,11 @@ BorderSize CLGaussianPyramidVertKernel::border_size() const } void CLGaussianPyramidVertKernel::configure(const ICLTensor *input, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output); +} + +void CLGaussianPyramidVertKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); @@ -164,7 +174,7 @@ void CLGaussianPyramidVertKernel::configure(const ICLTensor *input, ICLTensor *o // Create kernel const std::string kernel_name = std::string("gaussian5x1_sub_y"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("gaussian5x1_sub_y")); + _kernel = create_kernel(compile_context, "gaussian5x1_sub_y"); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 8; diff --git a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp index 96e2213bb9..0f09152757 100644 --- a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp +++ b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp @@ -72,6 +72,11 @@ CLComputeAllAnchorsKernel::CLComputeAllAnchorsKernel() } void CLComputeAllAnchorsKernel::configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info) +{ + configure(CLKernelLibrary::get().get_compile_context(), anchors, all_anchors, info); +} + +void CLComputeAllAnchorsKernel::configure(CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(anchors, all_anchors); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(anchors->info(), all_anchors->info(), info)); @@ -110,7 +115,7 @@ void CLComputeAllAnchorsKernel::configure(const ICLTensor *anchors, ICLTensor *a // Create kernel const std::string kernel_name = (is_quantized) ? "generate_proposals_compute_all_anchors_quantized" : "generate_proposals_compute_all_anchors"; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // The tensor all_anchors can be interpreted as an array of structs (each structs has values_per_roi fields). // This means we don't need to pad on the X dimension, as we know in advance how many fields diff --git a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp index c251dec066..f79388e93d 100644 --- a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp +++ b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp @@ -47,6 +47,11 @@ CLHOGOrientationBinningKernel::CLHOGOrientationBinningKernel() } void CLHOGOrientationBinningKernel::configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input_magnitude, input_phase, output, hog_info); +} + +void CLHOGOrientationBinningKernel::configure(CLCompileContext &compile_context, const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_magnitude, 1, DataType::S16); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_phase, 1, DataType::U8); @@ -75,7 +80,7 @@ void CLHOGOrientationBinningKernel::configure(const ICLTensor *input_magnitude, // Create kernel const std::string kernel_name = std::string("hog_orientation_binning"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); constexpr unsigned int num_elems_processed_per_iteration = 1; constexpr unsigned int num_elems_read_per_iteration = 1; @@ -138,6 +143,11 @@ CLHOGBlockNormalizationKernel::CLHOGBlockNormalizationKernel() } void CLHOGBlockNormalizationKernel::configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, hog_info); +} + +void CLHOGBlockNormalizationKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info) { ARM_COMPUTE_ERROR_ON(hog_info == nullptr); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, hog_info->num_bins(), DataType::F32); @@ -172,7 +182,7 @@ void CLHOGBlockNormalizationKernel::configure(const ICLTensor *input, ICLTensor build_opts.insert(args_str.str()); const std::string kernel_name = std::string("hog_block_normalization"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); constexpr unsigned int num_elems_processed_per_iteration = 1; constexpr unsigned int num_elems_read_per_iteration = 1; diff --git a/src/core/CL/kernels/CLHOGDetectorKernel.cpp b/src/core/CL/kernels/CLHOGDetectorKernel.cpp index c003df4fd2..02fad20a05 100644 --- a/src/core/CL/kernels/CLHOGDetectorKernel.cpp +++ b/src/core/CL/kernels/CLHOGDetectorKernel.cpp @@ -44,6 +44,13 @@ CLHOGDetectorKernel::CLHOGDetectorKernel() void CLHOGDetectorKernel::configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold, uint16_t idx_class) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, hog, detection_windows, num_detection_windows, detection_window_stride, threshold, idx_class); +} + +void CLHOGDetectorKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, + const Size2D &detection_window_stride, + float threshold, uint16_t idx_class) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F32); ARM_COMPUTE_ERROR_ON(hog == nullptr); @@ -82,7 +89,7 @@ void CLHOGDetectorKernel::configure(const ICLTensor *input, const ICLHOG *hog, I // Create kernel const std::string kernel_name = std::string("hog_detector"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); // Set static kernel arguments unsigned int idx = num_arguments_per_2D_tensor(); // Skip the input parameters diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.cpp b/src/core/CL/kernels/CLHarrisCornersKernel.cpp index eb1ebf6c14..2c344c7160 100644 --- a/src/core/CL/kernels/CLHarrisCornersKernel.cpp +++ b/src/core/CL/kernels/CLHarrisCornersKernel.cpp @@ -55,6 +55,13 @@ BorderSize CLHarrisScoreKernel::border_size() const void CLHarrisScoreKernel::configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output, int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, block_size, norm_factor, strength_thresh, sensitivity, border_undefined); +} + +void CLHarrisScoreKernel::configure(CLCompileContext &compile_context, const ICLImage *input1, const ICLImage *input2, ICLImage *output, + int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, + bool border_undefined) { ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input1); ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input2); @@ -82,7 +89,7 @@ void CLHarrisScoreKernel::configure(const ICLImage *input1, const ICLImage *inpu std::set build_opts = { ("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->info()->data_type())) }; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(harris_score_kernel_name.str(), build_opts)); + _kernel = create_kernel(compile_context, harris_score_kernel_name.str(), build_opts); // Set static kernel arguments unsigned int idx = 3 * num_arguments_per_2D_tensor(); //Skip the input and output parameters diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp index a92382dad3..8d9e1b9f9d 100644 --- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp @@ -90,6 +90,11 @@ Status CLHeightConcatenateLayerKernel::validate(const ITensorInfo *input, unsign } void CLHeightConcatenateLayerKernel::configure(const ICLTensor *input, unsigned int height_offset, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, height_offset, output); +} + +void CLHeightConcatenateLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int height_offset, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), height_offset, output->info())); @@ -119,7 +124,7 @@ void CLHeightConcatenateLayerKernel::configure(const ICLTensor *input, unsigned } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("concatenate_height", build_opts.options())); + _kernel = create_kernel(compile_context, "concatenate_height", build_opts.options()); // Configure kernel window ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); diff --git a/src/core/CL/kernels/CLHistogramKernel.cpp b/src/core/CL/kernels/CLHistogramKernel.cpp index adb998d2f5..5c44f6eec5 100644 --- a/src/core/CL/kernels/CLHistogramKernel.cpp +++ b/src/core/CL/kernels/CLHistogramKernel.cpp @@ -52,6 +52,11 @@ CLHistogramKernel::CLHistogramKernel() } void CLHistogramKernel::configure(const ICLImage *input, ICLDistribution1D *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output); +} + +void CLHistogramKernel::configure(CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output) { ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); ARM_COMPUTE_ERROR_ON(nullptr == output); @@ -84,7 +89,7 @@ void CLHistogramKernel::configure(const ICLImage *input, ICLDistribution1D *outp // Create kernel bool is_fixed_size = (256 == num_bins) && (1 == window_size) && (0 == offset) && (256 == offrange); const std::string kernel_name = is_fixed_size ? "hist_local_kernel_fixed" : "hist_local_kernel"; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name)); + _kernel = create_kernel(compile_context, kernel_name); // Set static kernel arguments unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters @@ -157,6 +162,11 @@ CLHistogramBorderKernel::CLHistogramBorderKernel() } void CLHistogramBorderKernel::configure(const ICLImage *input, ICLDistribution1D *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output); +} + +void CLHistogramBorderKernel::configure(CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output) { ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); ARM_COMPUTE_ERROR_ON(nullptr == output); @@ -190,7 +200,7 @@ void CLHistogramBorderKernel::configure(const ICLImage *input, ICLDistribution1D // Create kernel bool is_fixed_size = (256 == num_bins) && (1 == window_size) && (0 == offset) && (256 == offrange); const std::string kernel_name = is_fixed_size ? "hist_border_kernel_fixed" : "hist_border_kernel"; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name)); + _kernel = create_kernel(compile_context, kernel_name); // Set static kernel arguments unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 4023c14137..b24d2509d1 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -294,6 +294,13 @@ CLIm2ColKernel::CLIm2ColKernel() void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, unsigned int num_groups) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, kernel_dims, conv_info, has_bias, dilation, num_groups); +} + +void CLIm2ColKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, + const Size2D &dilation, + unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, dilation, num_groups)); @@ -311,7 +318,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Im2ColConfiguration im2col_config = configure_opencl_kernel(input->info(), kernel_dims, conv_info, has_bias, dilation, num_groups); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(im2col_config.kernel_name, im2col_config.build_options)); + _kernel = create_kernel(compile_context, im2col_config.kernel_name, im2col_config.build_options); _input = input; _output = output; diff --git a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp index 30684a2816..62a0485eff 100644 --- a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp @@ -76,6 +76,11 @@ CLInstanceNormalizationLayerKernel::CLInstanceNormalizationLayerKernel() } void CLInstanceNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, info); +} + +void CLInstanceNormalizationLayerKernel::configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); @@ -100,7 +105,7 @@ void CLInstanceNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor * build_opts.add_option_if(_input->info()->data_layout() == DataLayout::NHWC, "-DNHWC"); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("instance_normalization", build_opts.options())); + _kernel = create_kernel(compile_context, "instance_normalization", build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(_input->info(), _output->info()); diff --git a/src/core/CL/kernels/CLIntegralImageKernel.cpp b/src/core/CL/kernels/CLIntegralImageKernel.cpp index 79aa820865..415531d85c 100644 --- a/src/core/CL/kernels/CLIntegralImageKernel.cpp +++ b/src/core/CL/kernels/CLIntegralImageKernel.cpp @@ -38,6 +38,11 @@ using namespace arm_compute; void CLIntegralImageHorKernel::configure(const ICLTensor *input, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output); +} + +void CLIntegralImageHorKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32); @@ -47,7 +52,7 @@ void CLIntegralImageHorKernel::configure(const ICLTensor *input, ICLTensor *outp // Create kernel const std::string kernel_name = std::string("integral_horizontal"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name)); + _kernel = create_kernel(compile_context, kernel_name); // Configure kernel window const unsigned int num_elems_processed_per_iteration = input->info()->dimension(0); @@ -84,6 +89,11 @@ CLIntegralImageVertKernel::CLIntegralImageVertKernel() } void CLIntegralImageVertKernel::configure(ICLTensor *in_out) +{ + configure(CLKernelLibrary::get().get_compile_context(), in_out); +} + +void CLIntegralImageVertKernel::configure(CLCompileContext &compile_context, ICLTensor *in_out) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(in_out, 1, DataType::U32); @@ -91,7 +101,7 @@ void CLIntegralImageVertKernel::configure(ICLTensor *in_out) // Create kernel const std::string kernel_name = std::string("integral_vertical"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name)); + _kernel = create_kernel(compile_context, kernel_name); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration_x = 8; diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp index a0d7be043a..1817d15d3e 100644 --- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp +++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp @@ -96,6 +96,11 @@ CLL2NormalizeLayerKernel::CLL2NormalizeLayerKernel() } void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, sum, output, axis, epsilon); +} + +void CLL2NormalizeLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), sum->info(), output->info(), axis, epsilon)); @@ -131,7 +136,7 @@ void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor default: ARM_COMPUTE_ERROR("Axis not supported"); } - _kernel = static_cast(CLKernelLibrary::get().create_kernel("l2_normalize_" + kernel_name, build_opts)); + _kernel = create_kernel(compile_context, "l2_normalize_" + kernel_name, build_opts); // Set epsilon argument if(input->info()->data_type() == DataType::F32) diff --git a/src/core/CL/kernels/CLLKTrackerKernel.cpp b/src/core/CL/kernels/CLLKTrackerKernel.cpp index 68a210c115..3a7c1b5b9e 100644 --- a/src/core/CL/kernels/CLLKTrackerKernel.cpp +++ b/src/core/CL/kernels/CLLKTrackerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -41,6 +41,13 @@ using namespace arm_compute; void CLLKTrackerInitKernel::configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale) +{ + configure(CLKernelLibrary::get().get_compile_context(), old_points, new_points_estimates, old_points_internal, new_points_internal, use_initial_estimate, level, num_levels, pyramid_scale); +} + +void CLLKTrackerInitKernel::configure(CLCompileContext &compile_context, const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale) { ARM_COMPUTE_ERROR_ON(old_points == nullptr); @@ -55,7 +62,7 @@ void CLLKTrackerInitKernel::configure(const ICLKeyPointArray *old_points, const { kernel_name += (use_initial_estimate) ? std::string("_max_initial_estimate") : std::string("_max"); } - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name)); + _kernel = create_kernel(compile_context, kernel_name); // Set static kernel arguments unsigned int idx = 0; @@ -87,13 +94,18 @@ void CLLKTrackerInitKernel::run(const Window &window, cl::CommandQueue &queue) } void CLLKTrackerFinalizeKernel::configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points) +{ + configure(CLKernelLibrary::get().get_compile_context(), new_points_internal, new_points); +} + +void CLLKTrackerFinalizeKernel::configure(CLCompileContext &compile_context, ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points) { ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr); ARM_COMPUTE_ERROR_ON(new_points == nullptr); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("finalize")); + _kernel = create_kernel(compile_context, "finalize"); // Set static kernel arguments unsigned int idx = 0; @@ -124,6 +136,14 @@ void CLLKTrackerStage0Kernel::configure(const ICLTensor *old_input, const ICLTen ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, size_t window_dimension, size_t level) +{ + configure(CLKernelLibrary::get().get_compile_context(), old_input, old_scharr_gx, old_scharr_gy, old_points_internal, new_points_internal, coeff_table, old_ival, window_dimension, level); +} + +void CLLKTrackerStage0Kernel::configure(CLCompileContext &compile_context, const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, + size_t window_dimension, size_t level) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(old_input, 1, DataType::U8); @@ -175,7 +195,7 @@ void CLLKTrackerStage0Kernel::configure(const ICLTensor *old_input, const ICLTen }; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("lktracker_stage0")); + _kernel = create_kernel(compile_context, "lktracker_stage0"); // Set arguments unsigned int idx = 3 * num_arguments_per_2D_tensor(); @@ -212,6 +232,13 @@ CLLKTrackerStage1Kernel::CLLKTrackerStage1Kernel() void CLLKTrackerStage1Kernel::configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level) +{ + configure(CLKernelLibrary::get().get_compile_context(), new_input, new_points_internal, coeff_table, old_ival, termination, epsilon, num_iterations, window_dimension, level); +} + +void CLLKTrackerStage1Kernel::configure(CLCompileContext &compile_context, const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, + ICLOldValArray *old_ival, + Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(new_input, 1, DataType::U8); @@ -257,7 +284,7 @@ void CLLKTrackerStage1Kernel::configure(const ICLTensor *new_input, ICLLKInterna const int term_epsilon = (termination == Termination::TERM_CRITERIA_EPSILON || termination == Termination::TERM_CRITERIA_BOTH) ? 1 : 0; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("lktracker_stage1")); + _kernel = create_kernel(compile_context, "lktracker_stage1"); // Set static kernel arguments unsigned int idx = num_arguments_per_2D_tensor(); diff --git a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp index ad2f3a4892..fb750583c0 100644 --- a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -82,6 +82,11 @@ std::tuple validate_and_configure_window(ITensorInfo *input0, IT } // namespace void CLLocallyConnectedMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input0, input1, output); +} + +void CLLocallyConnectedMatrixMultiplyKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info())); @@ -108,7 +113,7 @@ void CLLocallyConnectedMatrixMultiplyKernel::configure(const ICLTensor *input0, // Create kernel std::string data_type_name = lower_string(string_from_data_type(input0->info()->data_type())); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(("gemm_lc_vm_" + data_type_name), build_opts)); + _kernel = create_kernel(compile_context, ("gemm_lc_vm_" + data_type_name), build_opts); // Configure kernel window auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info()); diff --git a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp index 68f793b6e2..2c28e030d2 100644 --- a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp +++ b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp @@ -46,6 +46,12 @@ CLMagnitudePhaseKernel::CLMagnitudePhaseKernel() void CLMagnitudePhaseKernel::configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, MagnitudeType mag_type, PhaseType phase_type) +{ + configure(CLKernelLibrary::get().get_compile_context(), gx, gy, magnitude, phase, mag_type, phase_type); +} + +void CLMagnitudePhaseKernel::configure(CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, + MagnitudeType mag_type, PhaseType phase_type) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gx, 1, DataType::S16, DataType::S32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gy, 1, DataType::S16, DataType::S32); @@ -118,7 +124,7 @@ void CLMagnitudePhaseKernel::configure(const ICLTensor *gx, const ICLTensor *gy, // Create kernel const std::string kernel_name = std::string("magnitude_phase"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 16; diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp index 7bfd6d6e53..5a6630d5d4 100644 --- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -64,6 +64,11 @@ Status CLMeanStdDevKernel::validate(const ITensorInfo *input, float *mean, cl::B } void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, mean, global_sum, stddev, global_sum_squared); +} + +void CLMeanStdDevKernel::configure(CLCompileContext &compile_context, const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, mean, global_sum); ARM_COMPUTE_ERROR_ON(stddev && nullptr == global_sum_squared); @@ -83,7 +88,7 @@ void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffe build_opts.insert("-DSTDDEV"); } - _kernel = static_cast(CLKernelLibrary::get().create_kernel("mean_stddev_accumulate", build_opts)); + _kernel = create_kernel(compile_context, "mean_stddev_accumulate", build_opts); // Set fixed arguments unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input parameters diff --git a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp index 72935cea6d..11ef86e8c3 100644 --- a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp @@ -84,6 +84,11 @@ CLMeanStdDevNormalizationKernel::CLMeanStdDevNormalizationKernel() } void CLMeanStdDevNormalizationKernel::configure(ICLTensor *input, ICLTensor *output, float epsilon) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, epsilon); +} + +void CLMeanStdDevNormalizationKernel::configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, float epsilon) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); @@ -105,7 +110,7 @@ void CLMeanStdDevNormalizationKernel::configure(ICLTensor *input, ICLTensor *out build_opts.add_option_if(_run_in_place, "-DIN_PLACE"); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("mean_stddev_normalization", build_opts.options())); + _kernel = create_kernel(compile_context, "mean_stddev_normalization", build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info()); diff --git a/src/core/CL/kernels/CLMedian3x3Kernel.cpp b/src/core/CL/kernels/CLMedian3x3Kernel.cpp index fca8d9b604..cfc9591584 100644 --- a/src/core/CL/kernels/CLMedian3x3Kernel.cpp +++ b/src/core/CL/kernels/CLMedian3x3Kernel.cpp @@ -38,6 +38,11 @@ BorderSize CLMedian3x3Kernel::border_size() const } void CLMedian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined); +} + +void CLMedian3x3Kernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); @@ -47,7 +52,7 @@ void CLMedian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, boo // Create kernel const std::string kernel_name = std::string("non_linear_filter_box3x3"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, { "-DMEDIAN" })); + _kernel = create_kernel(compile_context, kernel_name, { "-DMEDIAN" }); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 8; diff --git a/src/core/CL/kernels/CLMemsetKernel.cpp b/src/core/CL/kernels/CLMemsetKernel.cpp index b06ae7e118..9b37cb81fd 100644 --- a/src/core/CL/kernels/CLMemsetKernel.cpp +++ b/src/core/CL/kernels/CLMemsetKernel.cpp @@ -43,6 +43,13 @@ CLMemsetKernel::CLMemsetKernel() void CLMemsetKernel::configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window) +{ + configure(CLKernelLibrary::get().get_compile_context(), tensor, constant_value, window); +} + +void CLMemsetKernel::configure(CLCompileContext &compile_context, ICLTensor *tensor, + const PixelValue &constant_value, + Window *window) { ARM_COMPUTE_ERROR_ON_NULLPTR(tensor); ARM_COMPUTE_ERROR_THROW_ON(validate(tensor->info(), constant_value, window)); @@ -77,7 +84,7 @@ void CLMemsetKernel::configure(ICLTensor *tensor, build_opts.add_option("-DCONSTANT_VALUE=" + string_from_pixel_value(constant_value, data_type)); build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x)); build_opts.add_option_if(multi_access_x && remainder_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max(output_width_x - vec_size_x, 0))); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("memset", build_opts.options())); + _kernel = create_kernel(compile_context, "memset", build_opts.options()); } Status CLMemsetKernel::validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window) diff --git a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp index 622270acf5..c89bbcb320 100644 --- a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp @@ -87,6 +87,11 @@ CLMinMaxLayerKernel::CLMinMaxLayerKernel() } void CLMinMaxLayerKernel::configure(const ICLTensor *input, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output); +} + +void CLMinMaxLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); @@ -100,7 +105,7 @@ void CLMinMaxLayerKernel::configure(const ICLTensor *input, ICLTensor *output) build_opts.emplace("-DDEPTH=" + support::cpp11::to_string(input->info()->dimension(2))); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("minmax_layer", build_opts)); + _kernel = create_kernel(compile_context, "minmax_layer", build_opts); auto win_config = validate_and_configure_window(input->info(), output->info()); diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp index 3cead37cd8..77c945bed1 100644 --- a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp +++ b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp @@ -61,6 +61,11 @@ CLMinMaxKernel::CLMinMaxKernel() } void CLMinMaxKernel::configure(const ICLImage *input, cl::Buffer *min_max) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, min_max); +} + +void CLMinMaxKernel::configure(CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F32); ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); @@ -109,7 +114,7 @@ void CLMinMaxKernel::configure(const ICLImage *input, cl::Buffer *min_max) } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("minmax", build_opts)); + _kernel = create_kernel(compile_context, "minmax", build_opts); // Set fixed arguments unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters @@ -168,6 +173,12 @@ CLMinMaxLocationKernel::CLMinMaxLocationKernel() } void CLMinMaxLocationKernel::configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, ICLCoordinates2DArray *min_loc, ICLCoordinates2DArray *max_loc) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, min_max, min_max_count, min_loc, max_loc); +} + +void CLMinMaxLocationKernel::configure(CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, ICLCoordinates2DArray *min_loc, + ICLCoordinates2DArray *max_loc) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F32); ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); @@ -189,7 +200,7 @@ void CLMinMaxLocationKernel::configure(const ICLImage *input, cl::Buffer *min_ma } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("minmaxloc", build_opts)); + _kernel = create_kernel(compile_context, "minmaxloc", build_opts); // Set static arguments unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters diff --git a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp index 5e419743d0..01b8733ab8 100644 --- a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp +++ b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -56,6 +56,13 @@ BorderSize CLNonLinearFilterKernel::border_size() const void CLNonLinearFilterKernel::configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, function, mask_size, pattern, mask, border_undefined); +} + +void CLNonLinearFilterKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, + unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, + bool border_undefined) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); @@ -78,7 +85,7 @@ void CLNonLinearFilterKernel::configure(const ICLTensor *input, ICLTensor *outpu ss << "non_linear_filter_" << pattern_name << mask_size << "x" << mask_size; // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(ss.str(), build_opts)); + _kernel = create_kernel(compile_context, ss.str(), build_opts); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 8; diff --git a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp index 4e41f0df42..dd6aa1ea8f 100644 --- a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp +++ b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -42,6 +42,11 @@ BorderSize CLNonMaximaSuppression3x3Kernel::border_size() const } void CLNonMaximaSuppression3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined); +} + +void CLNonMaximaSuppression3x3Kernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::F32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::F32); @@ -51,7 +56,7 @@ void CLNonMaximaSuppression3x3Kernel::configure(const ICLTensor *input, ICLTenso // Create kernel std::set build_opts = { ("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())) }; - _kernel = static_cast(CLKernelLibrary::get().create_kernel("non_max_suppression", build_opts)); + _kernel = create_kernel(compile_context, "non_max_suppression", build_opts); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 8; diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp index 024d1de9b0..6284a6acb4 100644 --- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp @@ -106,6 +106,11 @@ BorderSize CLNormalizationLayerKernel::border_size() const } void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, norm_info); +} + +void CLNormalizationLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -156,7 +161,7 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou kernel_name = "normalization_layer_in_map_nchw"; } } - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info(), norm_info); diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp index c713e1fffa..d46581e4dc 100644 --- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp @@ -96,6 +96,11 @@ CLNormalizePlanarYUVLayerKernel::CLNormalizePlanarYUVLayerKernel() } void CLNormalizePlanarYUVLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, mean, std); +} + +void CLNormalizePlanarYUVLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std) { // Perform validation step ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, mean, std); @@ -127,7 +132,7 @@ void CLNormalizePlanarYUVLayerKernel::configure(const ICLTensor *input, ICLTenso // Create kernel kernel_name += lower_string(string_from_data_layout(input->info()->data_layout())); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info(), mean->info(), std->info()); diff --git a/src/core/CL/kernels/CLPadLayerKernel.cpp b/src/core/CL/kernels/CLPadLayerKernel.cpp index 3eeef5583c..764e2a41e7 100644 --- a/src/core/CL/kernels/CLPadLayerKernel.cpp +++ b/src/core/CL/kernels/CLPadLayerKernel.cpp @@ -97,6 +97,11 @@ CLPadLayerKernel::CLPadLayerKernel() } void CLPadLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, constant_value, mode); +} + +void CLPadLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode) { // Perform validation step ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -189,7 +194,7 @@ void CLPadLayerKernel::configure(const ICLTensor *input, ICLTensor *output, cons } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); } Status CLPadLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode) diff --git a/src/core/CL/kernels/CLPermuteKernel.cpp b/src/core/CL/kernels/CLPermuteKernel.cpp index 9a4d72db88..3f1f870802 100644 --- a/src/core/CL/kernels/CLPermuteKernel.cpp +++ b/src/core/CL/kernels/CLPermuteKernel.cpp @@ -76,6 +76,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c } // namespace void CLPermuteKernel::configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, perm); +} + +void CLPermuteKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), perm)); @@ -98,7 +103,7 @@ void CLPermuteKernel::configure(const ICLTensor *input, ICLTensor *output, const build_opts.add_option("-DP3=" + support::cpp11::to_string((_perm.num_dimensions() >= 3) ? perm[2] : 2)); build_opts.add_option("-DP4=" + support::cpp11::to_string((_perm.num_dimensions() >= 4) ? perm[3] : 3)); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("permute", build_opts.options())); + _kernel = create_kernel(compile_context, "permute", build_opts.options()); // Configure kernel window Window win = calculate_max_window(*input->info(), Steps()); diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp index 2df3ff4f34..49f5e04433 100644 --- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp +++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp @@ -144,6 +144,12 @@ CLPixelWiseMultiplicationKernel::CLPixelWiseMultiplicationKernel() void CLPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, scale, overflow_policy, rounding_policy, act_info); +} + +void CLPixelWiseMultiplicationKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info(), @@ -233,7 +239,7 @@ void CLPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const I } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set scale argument unsigned int idx = 3 * num_arguments_per_3D_tensor(); // Skip the inputs and output parameters @@ -369,6 +375,11 @@ CLComplexPixelWiseMultiplicationKernel::CLComplexPixelWiseMultiplicationKernel() } void CLComplexPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info); +} + +void CLComplexPixelWiseMultiplicationKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_complex(input1->info(), input2->info(), output->info(), act_info)); @@ -390,7 +401,7 @@ void CLComplexPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("pixelwise_mul_complex", build_opts.options())); + _kernel = create_kernel(compile_context, "pixelwise_mul_complex", build_opts.options()); ICLKernel::configure_internal(win_config.second); } diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp index dbbca4771b..43b8f85c39 100644 --- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp @@ -177,6 +177,11 @@ BorderSize CLPoolingLayerKernel::border_size() const } void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, pool_info, indices); +} + +void CLPoolingLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -275,12 +280,12 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, std::string kernel_name = ((is_pool3x3_stride_le3) ? "pooling_layer_optimized_" : "pooling_layer_") + support::cpp11::to_string(pool_size_x); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); } else // Run general case { std::string kernel_name = is_data_type_quantized_asymmetric(data_type) ? "pooling_layer_MxN_quantized_nchw" : "pooling_layer_MxN_nchw"; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); } break; } @@ -292,7 +297,7 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, build_opts.add_option_if(output->info()->tensor_shape().total_size_upper(3) > 1, "-DDST_DEPTH=" + support::cpp11::to_string(output->info()->dimension(idx_height))); std::string kernel_name = is_data_type_quantized_asymmetric(data_type) ? "pooling_layer_MxN_quantized_nhwc" : "pooling_layer_MxN_nhwc"; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); break; } default: diff --git a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp index ed70f4ddb7..9f930c54c2 100644 --- a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp +++ b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp @@ -101,6 +101,12 @@ CLPriorBoxLayerKernel::CLPriorBoxLayerKernel() } void CLPriorBoxLayerKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, cl::Buffer *aspect_ratios) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, info, min, max, aspect_ratios); +} + +void CLPriorBoxLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, + cl::Buffer *max, cl::Buffer *aspect_ratios) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); @@ -170,7 +176,7 @@ void CLPriorBoxLayerKernel::configure(const ICLTensor *input1, const ICLTensor * } unsigned int idx = num_arguments_per_2D_tensor(); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("prior_box_layer_nchw", build_opts.options())); + _kernel = create_kernel(compile_context, "prior_box_layer_nchw", build_opts.options()); _kernel.setArg(idx++, *_min); _kernel.setArg(idx++, *_max); diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp index 2ec2bd1178..e017946673 100644 --- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp @@ -80,6 +80,11 @@ CLQuantizationLayerKernel::CLQuantizationLayerKernel() } void CLQuantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output); +} + +void CLQuantizationLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); @@ -154,7 +159,7 @@ void CLQuantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *out build_opts.add_option("-DMIN_QUANT_VAL=" + support::cpp11::to_string(min_max_quant_values.first)); build_opts.add_option("-DMAX_QUANT_VAL=" + support::cpp11::to_string(min_max_quant_values.second)); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("quantization_layer", build_opts.options())); + _kernel = create_kernel(compile_context, "quantization_layer", build_opts.options()); } Status CLQuantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output) diff --git a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp index 41ed8ede7f..cc1af52342 100644 --- a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp +++ b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp @@ -104,6 +104,11 @@ CLROIAlignLayerKernel::CLROIAlignLayerKernel() } void CLROIAlignLayerKernel::configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, rois, output, pool_info); +} + +void CLROIAlignLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, rois); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), rois->info(), output->info(), pool_info)); @@ -149,7 +154,7 @@ void CLROIAlignLayerKernel::configure(const ICLTensor *input, const ICLTensor *r // Create kernel const std::string kernel_name = (is_qasymm) ? "roi_align_layer_quantized" : "roi_align_layer"; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); ICLKernel::configure_internal(win_config.second); } diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp index dad33b1bf8..5f64215485 100644 --- a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp @@ -72,6 +72,11 @@ CLROIPoolingLayerKernel::CLROIPoolingLayerKernel() } void CLROIPoolingLayerKernel::configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, rois, output, pool_info); +} + +void CLROIPoolingLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, rois, output); @@ -115,7 +120,7 @@ void CLROIPoolingLayerKernel::configure(const ICLTensor *input, const ICLTensor // Create kernel std::string kernel_name = "roi_pooling_layer"; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); // Set static kernel arguments unsigned int idx = 2 * num_arguments_per_3D_tensor() + num_arguments_per_1D_array(); diff --git a/src/core/CL/kernels/CLRangeKernel.cpp b/src/core/CL/kernels/CLRangeKernel.cpp index 4024de7f2b..97b5f01df4 100644 --- a/src/core/CL/kernels/CLRangeKernel.cpp +++ b/src/core/CL/kernels/CLRangeKernel.cpp @@ -92,6 +92,11 @@ CLRangeKernel::CLRangeKernel() } void CLRangeKernel::configure(ICLTensor *output, const float start, const float end, const float step) +{ + configure(CLKernelLibrary::get().get_compile_context(), output, start, end, step); +} + +void CLRangeKernel::configure(CLCompileContext &compile_context, ICLTensor *output, const float start, const float end, const float step) { ARM_COMPUTE_ERROR_ON_NULLPTR(output); @@ -123,7 +128,7 @@ void CLRangeKernel::configure(ICLTensor *output, const float start, const float kernel_name += "_quantized"; } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp index 7563f02ff3..5c760168ca 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.cpp +++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp @@ -128,6 +128,11 @@ BorderSize CLReductionOperationKernel::border_size() const } void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, axis, op, width); +} + +void CLReductionOperationKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -227,7 +232,7 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou default: ARM_COMPUTE_ERROR("Not supported"); } - _kernel = static_cast(CLKernelLibrary::get().create_kernel("reduction_operation_" + kernel_axis_name, build_opts.options())); + _kernel = create_kernel(compile_context, "reduction_operation_" + kernel_axis_name, build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(_input->info(), _output->info(), axis, op); diff --git a/src/core/CL/kernels/CLRemapKernel.cpp b/src/core/CL/kernels/CLRemapKernel.cpp index 12161fcd70..fb425b512f 100644 --- a/src/core/CL/kernels/CLRemapKernel.cpp +++ b/src/core/CL/kernels/CLRemapKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -48,6 +48,12 @@ BorderSize CLRemapKernel::border_size() const } void CLRemapKernel::configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, map_x, map_y, output, policy, border_undefined); +} + +void CLRemapKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, + bool border_undefined) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); @@ -66,7 +72,7 @@ void CLRemapKernel::configure(const ICLTensor *input, const ICLTensor *map_x, co std::string interpolation_name = string_from_interpolation_policy(policy); std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower); std::string kernel_name = "remap_" + interpolation_name; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); // Configure window constexpr unsigned int num_elems_processed_per_iteration = 4; diff --git a/src/core/CL/kernels/CLReorgLayerKernel.cpp b/src/core/CL/kernels/CLReorgLayerKernel.cpp index 4aa50c1889..e36bcbbe34 100644 --- a/src/core/CL/kernels/CLReorgLayerKernel.cpp +++ b/src/core/CL/kernels/CLReorgLayerKernel.cpp @@ -71,6 +71,11 @@ CLReorgLayerKernel::CLReorgLayerKernel() } void CLReorgLayerKernel::configure(const ICLTensor *input, ICLTensor *output, int32_t stride) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, stride); +} + +void CLReorgLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t stride) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), stride)); @@ -86,7 +91,7 @@ void CLReorgLayerKernel::configure(const ICLTensor *input, ICLTensor *output, in build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(input->info()->dimension(idx_channel))); build_opts.add_option("-DSTRIDE=" + support::cpp11::to_string(stride)); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure window // auto inizialize the output tensor if not yet initialized diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/CL/kernels/CLReshapeLayerKernel.cpp index a6053d97e3..33a1ceacc4 100644 --- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp +++ b/src/core/CL/kernels/CLReshapeLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -63,6 +63,11 @@ CLReshapeLayerKernel::CLReshapeLayerKernel() } void CLReshapeLayerKernel::configure(const ICLTensor *input, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output); +} + +void CLReshapeLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); @@ -72,7 +77,7 @@ void CLReshapeLayerKernel::configure(const ICLTensor *input, ICLTensor *output) // Create kernel std::set build_opts = { "-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size()) }; - _kernel = static_cast(CLKernelLibrary::get().create_kernel("reshape_layer", build_opts)); + _kernel = create_kernel(compile_context, "reshape_layer", build_opts); // Add static arguments const cl_int2 input_shape = diff --git a/src/core/CL/kernels/CLReverseKernel.cpp b/src/core/CL/kernels/CLReverseKernel.cpp index 83708e268c..d88a78c029 100644 --- a/src/core/CL/kernels/CLReverseKernel.cpp +++ b/src/core/CL/kernels/CLReverseKernel.cpp @@ -64,6 +64,11 @@ CLReverseKernel::CLReverseKernel() } void CLReverseKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, axis); +} + +void CLReverseKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, axis); @@ -82,7 +87,7 @@ void CLReverseKernel::configure(const ICLTensor *input, ICLTensor *output, const build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size())); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("reverse", build_opts.options())); + _kernel = create_kernel(compile_context, "reverse", build_opts.options()); // Set static kernel arguments unsigned int idx = 2 * num_arguments_per_4D_tensor() + num_arguments_per_1D_tensor(); diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp index 244afb5e4c..33c7ad71c1 100644 --- a/src/core/CL/kernels/CLScaleKernel.cpp +++ b/src/core/CL/kernels/CLScaleKernel.cpp @@ -181,6 +181,12 @@ const ICLTensor *CLScaleKernel::output() const } void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy, bool align_corners) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, policy, border_mode, sampling_policy, align_corners); +} + +void CLScaleKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy, + bool align_corners) { _align_corners = policy == InterpolationPolicy::BILINEAR && sampling_policy == SamplingPolicy::TOP_LEFT @@ -236,7 +242,7 @@ void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, Interpo std::string kernel_name = "scale_" + interpolation_name; kernel_name += call_quantized_kernel ? "_quantized_" : "_"; kernel_name += lower_string(string_from_data_layout(_data_layout)); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); unsigned int idx = is_nhwc ? 2 * num_arguments_per_4D_tensor() : 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.cpp b/src/core/CL/kernels/CLScharr3x3Kernel.cpp index 94b0d38c52..9f5bdb3cd8 100644 --- a/src/core/CL/kernels/CLScharr3x3Kernel.cpp +++ b/src/core/CL/kernels/CLScharr3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -48,6 +48,11 @@ BorderSize CLScharr3x3Kernel::border_size() const } void CLScharr3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined); +} + +void CLScharr3x3Kernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); @@ -83,7 +88,7 @@ void CLScharr3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, I } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("scharr3x3", build_opts)); + _kernel = create_kernel(compile_context, "scharr3x3", build_opts); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 8; diff --git a/src/core/CL/kernels/CLSelectKernel.cpp b/src/core/CL/kernels/CLSelectKernel.cpp index 5ee5405465..866ec6bde2 100644 --- a/src/core/CL/kernels/CLSelectKernel.cpp +++ b/src/core/CL/kernels/CLSelectKernel.cpp @@ -103,6 +103,11 @@ CLSelectKernel::CLSelectKernel() { } void CLSelectKernel::configure(const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), c, x, y, output); +} + +void CLSelectKernel::configure(CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(c, x, y, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(c->info(), x->info(), y->info(), output->info())); @@ -141,7 +146,7 @@ void CLSelectKernel::configure(const ICLTensor *c, const ICLTensor *x, const ICL kernel_name += "_different_rank"; kernel_name += is_input_rank_greater_than_two ? "_n" : "_2"; } - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(c->info(), x->info(), y->info(), output->info()); diff --git a/src/core/CL/kernels/CLSobel3x3Kernel.cpp b/src/core/CL/kernels/CLSobel3x3Kernel.cpp index 1186feb1b9..1c97c13d96 100644 --- a/src/core/CL/kernels/CLSobel3x3Kernel.cpp +++ b/src/core/CL/kernels/CLSobel3x3Kernel.cpp @@ -49,6 +49,11 @@ BorderSize CLSobel3x3Kernel::border_size() const } void CLSobel3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined); +} + +void CLSobel3x3Kernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); @@ -85,7 +90,7 @@ void CLSobel3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, IC // Create kernel const std::string kernel_name = std::string("sobel3x3"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 8; diff --git a/src/core/CL/kernels/CLSobel5x5Kernel.cpp b/src/core/CL/kernels/CLSobel5x5Kernel.cpp index cafdd9807a..597807796e 100644 --- a/src/core/CL/kernels/CLSobel5x5Kernel.cpp +++ b/src/core/CL/kernels/CLSobel5x5Kernel.cpp @@ -49,6 +49,11 @@ BorderSize CLSobel5x5HorKernel::border_size() const } void CLSobel5x5HorKernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined); +} + +void CLSobel5x5HorKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); @@ -86,7 +91,7 @@ void CLSobel5x5HorKernel::configure(const ICLTensor *input, ICLTensor *output_x, // Create kernel const std::string kernel_name = std::string("sobel_separable1x5"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 8; @@ -147,6 +152,11 @@ BorderSize CLSobel5x5VertKernel::border_size() const } void CLSobel5x5VertKernel::configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input_x, input_y, output_x, output_y, border_undefined); +} + +void CLSobel5x5VertKernel::configure(CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) { ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); @@ -185,7 +195,7 @@ void CLSobel5x5VertKernel::configure(const ICLTensor *input_x, const ICLTensor * // Create kernel const std::string kernel_name = std::string("sobel_separable5x1"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); const ICLTensor *input = _run_sobel_x ? _input_x : _input_y; diff --git a/src/core/CL/kernels/CLSobel7x7Kernel.cpp b/src/core/CL/kernels/CLSobel7x7Kernel.cpp index 0c5bb58cb9..183ebce3ac 100644 --- a/src/core/CL/kernels/CLSobel7x7Kernel.cpp +++ b/src/core/CL/kernels/CLSobel7x7Kernel.cpp @@ -49,6 +49,11 @@ BorderSize CLSobel7x7HorKernel::border_size() const } void CLSobel7x7HorKernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined); +} + +void CLSobel7x7HorKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); @@ -88,7 +93,7 @@ void CLSobel7x7HorKernel::configure(const ICLTensor *input, ICLTensor *output_x, } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 8; @@ -149,6 +154,11 @@ BorderSize CLSobel7x7VertKernel::border_size() const } void CLSobel7x7VertKernel::configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) +{ + configure(CLKernelLibrary::get().get_compile_context(), input_x, input_y, output_x, output_y, border_undefined); +} + +void CLSobel7x7VertKernel::configure(CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) { ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); @@ -187,7 +197,7 @@ void CLSobel7x7VertKernel::configure(const ICLTensor *input_x, const ICLTensor * // Create kernel const std::string kernel_name = std::string("sobel_separable7x1"); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = create_kernel(compile_context, kernel_name, build_opts); const ICLTensor *input = _run_sobel_x ? _input_x : _input_y; diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp index d7a1778e26..112d864827 100644 --- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp @@ -219,6 +219,11 @@ CLLogits1DMaxShiftExpSumKernel::CLLogits1DMaxShiftExpSumKernel() } void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, max, output, sum, info); +} + +void CLLogits1DMaxShiftExpSumKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, max, sum, output); @@ -277,7 +282,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor } // Create kernel. - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set static arguments. Both the kernels use the same arguments unsigned int idx = 4 * num_arguments_per_3D_tensor(); //Skip the input and output parameters @@ -342,6 +347,11 @@ CLLogits1DNormKernel::CLLogits1DNormKernel() } void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, sum, output, info); +} + +void CLLogits1DNormKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output); @@ -376,7 +386,7 @@ void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *su // Create kernel std::string kernel_name = is_quantized_asymmetric ? "softmax_layer_norm_quantized" : "softmax_layer_norm"; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure window auto win_config = validate_and_configure_window_1DNorm(input->info(), output->info(), sum->info(), info); diff --git a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp index 1b02eef040..520924e764 100644 --- a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp +++ b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp @@ -90,6 +90,11 @@ CLSpaceToBatchLayerKernel::CLSpaceToBatchLayerKernel() } void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, block_shape, paddings, output); +} + +void CLSpaceToBatchLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), block_shape->info(), paddings->info(), output->info())); @@ -113,7 +118,7 @@ void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input, const ICLTenso build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width))); build_opts.add_option("-DHEIGHT_IN=" + support::cpp11::to_string(input->info()->dimension(idx_height))); build_opts.add_option("-DBATCH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_batch))); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("space_to_batch_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options())); + _kernel = create_kernel(compile_context, "space_to_batch_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()); // Configure kernel window Window win = calculate_max_window(*output->info(), Steps()); @@ -122,6 +127,13 @@ void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input, const ICLTenso void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, block_shape_x, block_shape_y, padding_left, padding_right, output); +} + +void CLSpaceToBatchLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, + const Size2D &padding_right, + ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -153,7 +165,7 @@ void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input, const int bloc build_opts.add_option("-DPAD_RIGHT_X=" + support::cpp11::to_string(padding_right.x())); build_opts.add_option("-DPAD_LEFT_Y=" + support::cpp11::to_string(padding_left.y())); build_opts.add_option("-DPAD_RIGHT_Y=" + support::cpp11::to_string(padding_right.y())); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("space_to_batch_static_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options())); + _kernel = create_kernel(compile_context, "space_to_batch_static_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()); // Configure kernel window Window win = calculate_max_window(*output->info(), Steps()); diff --git a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp index a7e3777bab..b4bd3b8fbe 100644 --- a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp +++ b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp @@ -67,6 +67,11 @@ CLSpaceToDepthLayerKernel::CLSpaceToDepthLayerKernel() } void CLSpaceToDepthLayerKernel::configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, block_shape); +} + +void CLSpaceToDepthLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -88,7 +93,7 @@ void CLSpaceToDepthLayerKernel::configure(const ICLTensor *input, ICLTensor *out build_opts.add_option("-DCHANNEL_SIZE=" + support::cpp11::to_string(output->info()->dimension(idx_channel))); build_opts.add_option("-DBLOCK_SHAPE=" + support::cpp11::to_string(block_shape)); build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(output->info()->dimension(idx_width))); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("space_to_depth_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options())); + _kernel = create_kernel(compile_context, "space_to_depth_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()); // Configure kernel window Window win = calculate_max_window(*output->info(), Steps()); diff --git a/src/core/CL/kernels/CLStackLayerKernel.cpp b/src/core/CL/kernels/CLStackLayerKernel.cpp index 2da35d4cdc..bc8645b381 100644 --- a/src/core/CL/kernels/CLStackLayerKernel.cpp +++ b/src/core/CL/kernels/CLStackLayerKernel.cpp @@ -81,6 +81,11 @@ CLStackLayerKernel::CLStackLayerKernel() } void CLStackLayerKernel::configure(const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, axis, idx_input, num_tensors, output); +} + +void CLStackLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), axis, idx_input, num_tensors, output->info())); @@ -99,7 +104,7 @@ void CLStackLayerKernel::configure(const ICLTensor *input, unsigned int axis, un build_opts.add_option("-DDST_DIM3=" + support::cpp11::to_string(output->info()->dimension(3))); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("stack_layer", build_opts.options())); + _kernel = create_kernel(compile_context, "stack_layer", build_opts.options()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); ICLKernel::configure_internal(win_config.second); diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp index 63478f1002..99c0b0b312 100644 --- a/src/core/CL/kernels/CLStridedSliceKernel.cpp +++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp @@ -101,6 +101,13 @@ CLStridedSliceKernel::CLStridedSliceKernel() void CLStridedSliceKernel::configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); +} + +void CLStridedSliceKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, + const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, + int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask)); @@ -157,7 +164,7 @@ void CLStridedSliceKernel::configure(const ICLTensor *input, ICLTensor *output, "-DDST_DEPTH=1"); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("strided_slice", build_opts.options())); + _kernel = create_kernel(compile_context, "strided_slice", build_opts.options()); // Set config_id for enabling LWS tuning _config_id = "strided_slice"; diff --git a/src/core/CL/kernels/CLTableLookupKernel.cpp b/src/core/CL/kernels/CLTableLookupKernel.cpp index bbdaa37410..f6c6ffbae8 100644 --- a/src/core/CL/kernels/CLTableLookupKernel.cpp +++ b/src/core/CL/kernels/CLTableLookupKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,6 +37,11 @@ using namespace arm_compute; void CLTableLookupKernel::configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, lut, output); +} + +void CLTableLookupKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16); @@ -46,7 +51,7 @@ void CLTableLookupKernel::configure(const ICLTensor *input, const ICLLut *lut, I // Create kernel std::string kernel_name = (DataType::S16 == lut->type()) ? "tablelookup_S16" : "tablelookup_U8"; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name)); + _kernel = create_kernel(compile_context, kernel_name); // Set lut argument unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters diff --git a/src/core/CL/kernels/CLThresholdKernel.cpp b/src/core/CL/kernels/CLThresholdKernel.cpp index 6e07cefc77..3a94faca4b 100644 --- a/src/core/CL/kernels/CLThresholdKernel.cpp +++ b/src/core/CL/kernels/CLThresholdKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -36,6 +36,12 @@ using namespace arm_compute; void CLThresholdKernel::configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, threshold, false_value, true_value, type, upper); +} + +void CLThresholdKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, uint8_t threshold, + uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); @@ -57,7 +63,7 @@ void CLThresholdKernel::configure(const ICLTensor *input, ICLTensor *output, uin } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name)); + _kernel = create_kernel(compile_context, kernel_name); // Set arguments unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters diff --git a/src/core/CL/kernels/CLTileKernel.cpp b/src/core/CL/kernels/CLTileKernel.cpp index b205e3a8af..5db69d32e1 100644 --- a/src/core/CL/kernels/CLTileKernel.cpp +++ b/src/core/CL/kernels/CLTileKernel.cpp @@ -68,6 +68,11 @@ CLTileKernel::CLTileKernel() } void CLTileKernel::configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, multiples); +} + +void CLTileKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -97,7 +102,7 @@ void CLTileKernel::configure(const ICLTensor *input, ICLTensor *output, const Mu build_opts.add_option("-DDST_DEPTH=" + support::cpp11::to_string(output->info()->dimension(2))); build_opts.add_option_if(multi_access_x, "-DOFFSET=" + support::cpp11::to_string(offset)); build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x)); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("tile", build_opts.options())); + _kernel = create_kernel(compile_context, "tile", build_opts.options()); // Configure window without padding Window win = calculate_max_window(*output->info()); diff --git a/src/core/CL/kernels/CLTransposeKernel.cpp b/src/core/CL/kernels/CLTransposeKernel.cpp index eb348fff74..37f07e65a4 100644 --- a/src/core/CL/kernels/CLTransposeKernel.cpp +++ b/src/core/CL/kernels/CLTransposeKernel.cpp @@ -107,6 +107,11 @@ Status CLTransposeKernel::validate(const ITensorInfo *input, const ITensorInfo * } void CLTransposeKernel::configure(const ICLTensor *input, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output); +} + +void CLTransposeKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -123,7 +128,7 @@ void CLTransposeKernel::configure(const ICLTensor *input, ICLTensor *output) data_type_in_bytes << input->info()->element_size(); build_opts.emplace("-DDATA_TYPE_IN_BYTES=" + data_type_in_bytes.str()); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("transpose", build_opts)); + _kernel = create_kernel(compile_context, "transpose", build_opts); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info()); diff --git a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp index a061236d37..8df6d5dec4 100644 --- a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp +++ b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp @@ -65,6 +65,11 @@ Status CLUpsampleLayerKernel::validate(const ITensorInfo *input, const ITensorIn } void CLUpsampleLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, info, upsampling_policy); +} + +void CLUpsampleLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_UNUSED(upsampling_policy); @@ -126,7 +131,7 @@ void CLUpsampleLayerKernel::configure(const ICLTensor *input, ICLTensor *output, build_opts.add_option_if(multi_access_x, "-DVEC_SIZE_OUT=" + support::cpp11::to_string(num_elems_processed_per_iteration_x)); build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X_IN=" + support::cpp11::to_string(std::max(_input->info()->dimension(0) - _num_elems_processed_per_iteration_input_x, 0))); build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X_OUT=" + support::cpp11::to_string(std::max(output_width_x - num_elems_processed_per_iteration_x, 0))); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("upsample_layer_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options())); + _kernel = create_kernel(compile_context, "upsample_layer_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()); ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLWarpAffineKernel.cpp b/src/core/CL/kernels/CLWarpAffineKernel.cpp index 4aaa49128c..43bd34fcea 100644 --- a/src/core/CL/kernels/CLWarpAffineKernel.cpp +++ b/src/core/CL/kernels/CLWarpAffineKernel.cpp @@ -60,6 +60,11 @@ BorderSize CLWarpAffineKernel::border_size() const } void CLWarpAffineKernel::configure(const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, matrix, policy); +} + +void CLWarpAffineKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); @@ -77,7 +82,7 @@ void CLWarpAffineKernel::configure(const ICLTensor *input, ICLTensor *output, co std::string interpolation_name = string_from_interpolation_policy(policy); std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower); const std::string kernel_name = "warp_affine_" + interpolation_name; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, options)); + _kernel = create_kernel(compile_context, kernel_name, options); // Set static kernel arguments unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters diff --git a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp index e537aec058..3c47567203 100644 --- a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp +++ b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,11 @@ BorderSize CLWarpPerspectiveKernel::border_size() const } void CLWarpPerspectiveKernel::configure(const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, matrix, policy); +} + +void CLWarpPerspectiveKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); @@ -76,7 +81,7 @@ void CLWarpPerspectiveKernel::configure(const ICLTensor *input, ICLTensor *outpu std::string interpolation_name = string_from_interpolation_policy(policy); std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower); std::string kernel_name = "warp_perspective_" + interpolation_name; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, options)); + _kernel = create_kernel(compile_context, kernel_name, options); // Set static kernel arguments unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp index 373cbe51ba..a0db660414 100644 --- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp +++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp @@ -78,6 +78,11 @@ CLWeightsReshapeKernel::CLWeightsReshapeKernel() } void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, biases, output, num_groups); +} + +void CLWeightsReshapeKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -102,7 +107,7 @@ void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor * build_opts.add_option_if(biases != nullptr, "-DHAS_BIAS"); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("reshape_to_columns", build_opts.options())); + _kernel = create_kernel(compile_context, "reshape_to_columns", build_opts.options()); // Configure window Window win = calculate_max_window(*input->info(), Steps()); diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp index 4d52f09b48..ea549e9f46 100644 --- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp @@ -95,6 +95,11 @@ Status CLWidthConcatenate2TensorsKernel::validate(const ITensorInfo *input1, con } void CLWidthConcatenate2TensorsKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output); +} + +void CLWidthConcatenate2TensorsKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info())); @@ -128,7 +133,7 @@ void CLWidthConcatenate2TensorsKernel::configure(const ICLTensor *input1, const } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("concatenate_width_x2", build_opts.options())); + _kernel = create_kernel(compile_context, "concatenate_width_x2", build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info()); diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp index 0673365d83..e1ec9d1344 100644 --- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp @@ -113,6 +113,12 @@ Status CLWidthConcatenate4TensorsKernel::validate(const ITensorInfo *input1, con } void CLWidthConcatenate4TensorsKernel::configure(const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, input3, input4, output); +} + +void CLWidthConcatenate4TensorsKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4, + ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, input3, input4, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), input3->info(), input4->info(), output->info())); @@ -156,7 +162,7 @@ void CLWidthConcatenate4TensorsKernel::configure(const ICLTensor *input1, const } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("concatenate_width_x4", build_opts.options())); + _kernel = create_kernel(compile_context, "concatenate_width_x4", build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(input1->info(), input2->info(), input3->info(), input4->info(), output->info()); diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp index eaefe666f2..9ff373b18d 100644 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp @@ -91,6 +91,11 @@ Status CLWidthConcatenateLayerKernel::validate(const ITensorInfo *input, unsigne } void CLWidthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned int width_offset, ICLTensor *output) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, width_offset, output); +} + +void CLWidthConcatenateLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int width_offset, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), width_offset, output->info())); @@ -118,7 +123,7 @@ void CLWidthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("concatenate_width", build_opts.options())); + _kernel = create_kernel(compile_context, "concatenate_width", build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), width_offset, output->info()); ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); diff --git a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp index b4135f26d5..38649126b7 100644 --- a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp @@ -100,6 +100,11 @@ CLWinogradFilterTransformKernel::CLWinogradFilterTransformKernel() } void CLWinogradFilterTransformKernel::configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, winograd_info); +} + +void CLWinogradFilterTransformKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -119,7 +124,7 @@ void CLWinogradFilterTransformKernel::configure(const ICLTensor *input, ICLTenso // Create kernel std::string kernel_name = "winograd_filter_transform_" + output_tile_size.to_string() + "_" + kernel_size.to_string() + "_" + lower_string(string_from_data_layout(input->info()->data_layout())); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); _input = input; _output = output; diff --git a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp index 5b76a3d24f..cf882ae9ac 100644 --- a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp @@ -109,6 +109,11 @@ BorderSize CLWinogradInputTransformKernel::border_size() const } void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, winograd_info); +} + +void CLWinogradInputTransformKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), winograd_info)); @@ -195,7 +200,7 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor kernel_name += support::cpp11::to_string(_step_z); kernel_name += "_" + lower_string(string_from_data_layout(_data_layout)); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Create window and update padding auto win_config = validate_and_configure_window(input->info(), output->info(), winograd_info); diff --git a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp index 0bd3b28d73..f08b5ac7c8 100644 --- a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp @@ -136,6 +136,12 @@ CLWinogradOutputTransformKernel::CLWinogradOutputTransformKernel() } void CLWinogradOutputTransformKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, winograd_info, act_info); +} + +void CLWinogradOutputTransformKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, + const ActivationLayerInfo &act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -188,7 +194,7 @@ void CLWinogradOutputTransformKernel::configure(const ICLTensor *input, const IC // Create kernel std::string kernel_name = "winograd_output_transform_" + output_tile_size.to_string() + "_" + kernel_size.to_string() + "_" + lower_string(string_from_data_layout(winograd_info.output_data_layout)); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), (bias != nullptr ? bias->info() : nullptr), output->info(), winograd_info.output_tile_size); diff --git a/src/core/CL/kernels/CLYOLOLayerKernel.cpp b/src/core/CL/kernels/CLYOLOLayerKernel.cpp index f634d912eb..ee119233a4 100644 --- a/src/core/CL/kernels/CLYOLOLayerKernel.cpp +++ b/src/core/CL/kernels/CLYOLOLayerKernel.cpp @@ -101,6 +101,11 @@ CLYOLOLayerKernel::CLYOLOLayerKernel() } void CLYOLOLayerKernel::configure(ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, act_info, num_classes); +} + +void CLYOLOLayerKernel::configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); @@ -127,7 +132,7 @@ void CLYOLOLayerKernel::configure(ICLTensor *input, ICLTensor *output, const Act // Create kernel std::string kernel_name = std::string("yolo_layer_") + lower_string(string_from_data_layout(input->info()->data_layout())); - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Make sure _kernel is initialized before calling the parent's configure _input = input; diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp index 0b29fe1ced..9882145741 100644 --- a/src/runtime/CL/functions/CLActivationLayer.cpp +++ b/src/runtime/CL/functions/CLActivationLayer.cpp @@ -37,9 +37,7 @@ CLActivationLayer::CLActivationLayer(CLRuntimeContext *ctx) void CLActivationLayer::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info) { - auto core_ctx = _ctx ? _ctx->core_runtime_context() : /* Legacy */ nullptr; - - auto k = arm_compute::support::cpp14::make_unique(core_ctx); + auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, act_info); _kernel = std::move(k); } -- cgit v1.2.1