From bef7fa27b0d231a8649952f60808132d109b6345 Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Wed, 21 Oct 2020 15:58:54 +0100 Subject: COMPMID-3639: (3RDPARTY_UPDATE) Move CL kernels to src Change-Id: I10d27db788e5086adae1841e3e2441cd9b76ef84 Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4310 Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp | 4 +- src/core/CL/kernels/CLAbsoluteDifferenceKernel.h | 79 ++++++++ src/core/CL/kernels/CLAccumulateKernel.cpp | 2 +- src/core/CL/kernels/CLAccumulateKernel.h | 114 +++++++++++ src/core/CL/kernels/CLActivationLayerKernel.cpp | 2 +- src/core/CL/kernels/CLActivationLayerKernel.h | 77 +++++++ src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp | 2 +- src/core/CL/kernels/CLArgMinMaxLayerKernel.h | 106 ++++++++++ .../CL/kernels/CLBatchConcatenateLayerKernel.cpp | 2 +- .../CL/kernels/CLBatchConcatenateLayerKernel.h | 82 ++++++++ .../CL/kernels/CLBatchNormalizationLayerKernel.cpp | 2 +- .../CL/kernels/CLBatchNormalizationLayerKernel.h | 120 +++++++++++ src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp | 2 +- src/core/CL/kernels/CLBatchToSpaceLayerKernel.h | 111 ++++++++++ src/core/CL/kernels/CLBitwiseAndKernel.cpp | 2 +- src/core/CL/kernels/CLBitwiseAndKernel.h | 76 +++++++ src/core/CL/kernels/CLBitwiseNotKernel.cpp | 2 +- src/core/CL/kernels/CLBitwiseNotKernel.h | 56 ++++++ src/core/CL/kernels/CLBitwiseOrKernel.cpp | 2 +- src/core/CL/kernels/CLBitwiseOrKernel.h | 76 +++++++ src/core/CL/kernels/CLBitwiseXorKernel.cpp | 2 +- src/core/CL/kernels/CLBitwiseXorKernel.h | 76 +++++++ .../CL/kernels/CLBoundingBoxTransformKernel.cpp | 2 +- src/core/CL/kernels/CLBoundingBoxTransformKernel.h | 99 +++++++++ src/core/CL/kernels/CLBox3x3Kernel.cpp | 2 +- src/core/CL/kernels/CLBox3x3Kernel.h | 59 ++++++ src/core/CL/kernels/CLCannyEdgeKernel.cpp | 2 +- src/core/CL/kernels/CLCannyEdgeKernel.h | 185 +++++++++++++++++ src/core/CL/kernels/CLChannelCombineKernel.cpp | 2 +- src/core/CL/kernels/CLChannelCombineKernel.h | 102 ++++++++++ src/core/CL/kernels/CLChannelExtractKernel.cpp | 2 +- src/core/CL/kernels/CLChannelExtractKernel.h | 95 +++++++++ .../CL/kernels/CLChannelShuffleLayerKernel.cpp | 2 +- src/core/CL/kernels/CLChannelShuffleLayerKernel.h | 82 ++++++++ src/core/CL/kernels/CLCol2ImKernel.cpp | 2 +- src/core/CL/kernels/CLCol2ImKernel.h | 106 ++++++++++ src/core/CL/kernels/CLColorConvertKernel.cpp | 2 +- src/core/CL/kernels/CLColorConvertKernel.h | 121 +++++++++++ src/core/CL/kernels/CLComparisonKernel.cpp | 2 +- src/core/CL/kernels/CLComparisonKernel.h | 89 ++++++++ .../CLConvertFullyConnectedWeightsKernel.cpp | 2 +- .../kernels/CLConvertFullyConnectedWeightsKernel.h | 90 +++++++++ src/core/CL/kernels/CLConvolutionKernel.cpp | 4 +- src/core/CL/kernels/CLConvolutionKernel.h | 224 +++++++++++++++++++++ src/core/CL/kernels/CLCopyKernel.cpp | 2 +- src/core/CL/kernels/CLCopyKernel.h | 83 ++++++++ src/core/CL/kernels/CLCropKernel.cpp | 2 +- src/core/CL/kernels/CLCropKernel.h | 103 ++++++++++ .../kernels/CLDeconvolutionLayerUpsampleKernel.cpp | 2 +- .../kernels/CLDeconvolutionLayerUpsampleKernel.h | 86 ++++++++ .../kernels/CLDeconvolutionReshapeOutputKernel.cpp | 2 +- .../kernels/CLDeconvolutionReshapeOutputKernel.h | 106 ++++++++++ .../CL/kernels/CLDepthConcatenateLayerKernel.cpp | 2 +- .../CL/kernels/CLDepthConcatenateLayerKernel.h | 80 ++++++++ src/core/CL/kernels/CLDepthConvertLayerKernel.cpp | 2 +- src/core/CL/kernels/CLDepthConvertLayerKernel.h | 91 +++++++++ src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp | 2 +- src/core/CL/kernels/CLDepthToSpaceLayerKernel.h | 84 ++++++++ .../CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp | 4 +- .../CLDepthwiseConvolutionLayer3x3NCHWKernel.h | 114 +++++++++++ .../CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp | 4 +- .../CLDepthwiseConvolutionLayer3x3NHWCKernel.h | 113 +++++++++++ .../CLDepthwiseConvolutionLayerNativeKernel.cpp | 4 +- .../CLDepthwiseConvolutionLayerNativeKernel.h | 131 ++++++++++++ ...pthwiseConvolutionLayerReshapeWeightsKernel.cpp | 4 +- ...DepthwiseConvolutionLayerReshapeWeightsKernel.h | 85 ++++++++ .../CL/kernels/CLDequantizationLayerKernel.cpp | 2 +- src/core/CL/kernels/CLDequantizationLayerKernel.h | 79 ++++++++ src/core/CL/kernels/CLDerivativeKernel.cpp | 2 +- src/core/CL/kernels/CLDerivativeKernel.h | 83 ++++++++ src/core/CL/kernels/CLDilateKernel.cpp | 2 +- src/core/CL/kernels/CLDilateKernel.h | 59 ++++++ .../CL/kernels/CLDirectConvolutionLayerKernel.cpp | 2 +- .../CL/kernels/CLDirectConvolutionLayerKernel.h | 125 ++++++++++++ .../CL/kernels/CLElementWiseUnaryLayerKernel.cpp | 2 +- .../CL/kernels/CLElementWiseUnaryLayerKernel.h | 66 ++++++ .../CL/kernels/CLElementwiseOperationKernel.cpp | 2 +- src/core/CL/kernels/CLElementwiseOperationKernel.h | 212 +++++++++++++++++++ src/core/CL/kernels/CLErodeKernel.cpp | 2 +- src/core/CL/kernels/CLErodeKernel.h | 59 ++++++ src/core/CL/kernels/CLFFTDigitReverseKernel.cpp | 2 +- src/core/CL/kernels/CLFFTDigitReverseKernel.h | 89 ++++++++ src/core/CL/kernels/CLFFTRadixStageKernel.cpp | 2 +- src/core/CL/kernels/CLFFTRadixStageKernel.h | 97 +++++++++ src/core/CL/kernels/CLFFTScaleKernel.cpp | 2 +- src/core/CL/kernels/CLFFTScaleKernel.h | 86 ++++++++ src/core/CL/kernels/CLFastCornersKernel.cpp | 2 +- src/core/CL/kernels/CLFastCornersKernel.h | 133 ++++++++++++ src/core/CL/kernels/CLFillBorderKernel.cpp | 2 +- src/core/CL/kernels/CLFillBorderKernel.h | 96 +++++++++ src/core/CL/kernels/CLFlattenLayerKernel.cpp | 2 +- src/core/CL/kernels/CLFlattenLayerKernel.h | 83 ++++++++ src/core/CL/kernels/CLFloorKernel.cpp | 2 +- src/core/CL/kernels/CLFloorKernel.h | 81 ++++++++ .../CL/kernels/CLFuseBatchNormalizationKernel.cpp | 2 +- .../CL/kernels/CLFuseBatchNormalizationKernel.h | 126 ++++++++++++ .../CLGEMMLowpMatrixMultiplyNativeKernel.cpp | 2 +- .../kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h | 108 ++++++++++ .../CLGEMMLowpMatrixMultiplyReshapedKernel.cpp | 2 +- .../CLGEMMLowpMatrixMultiplyReshapedKernel.h | 123 +++++++++++ ...GEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp | 16 +- ...CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h | 155 ++++++++++++++ .../kernels/CLGEMMLowpOffsetContributionKernel.cpp | 2 +- .../kernels/CLGEMMLowpOffsetContributionKernel.h | 116 +++++++++++ ...GEMMLowpOffsetContributionOutputStageKernel.cpp | 2 +- ...CLGEMMLowpOffsetContributionOutputStageKernel.h | 136 +++++++++++++ ...owpQuantizeDownInt32ScaleByFixedPointKernel.cpp | 2 +- ...MLowpQuantizeDownInt32ScaleByFixedPointKernel.h | 89 ++++++++ ...GEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp | 2 +- ...CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h | 101 ++++++++++ .../CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp | 2 +- .../CLGEMMLowpQuantizeDownInt32ScaleKernel.h | 102 ++++++++++ src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp | 2 +- src/core/CL/kernels/CLGEMMLowpReductionKernel.h | 176 ++++++++++++++++ src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp | 2 +- src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h | 122 +++++++++++ .../kernels/CLGEMMMatrixMultiplyNativeKernel.cpp | 2 +- .../CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h | 127 ++++++++++++ .../kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp | 2 +- .../kernels/CLGEMMMatrixMultiplyReshapedKernel.h | 188 +++++++++++++++++ .../CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp | 6 +- .../CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h | 168 ++++++++++++++++ .../kernels/CLGEMMMatrixVectorMultiplyKernel.cpp | 2 +- .../CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h | 84 ++++++++ .../CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp | 2 +- src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h | 105 ++++++++++ .../CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp | 2 +- src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h | 135 +++++++++++++ src/core/CL/kernels/CLGatherKernel.cpp | 2 +- src/core/CL/kernels/CLGatherKernel.h | 89 ++++++++ src/core/CL/kernels/CLGaussian3x3Kernel.cpp | 2 +- src/core/CL/kernels/CLGaussian3x3Kernel.h | 59 ++++++ src/core/CL/kernels/CLGaussian5x5Kernel.cpp | 2 +- src/core/CL/kernels/CLGaussian5x5Kernel.h | 83 ++++++++ src/core/CL/kernels/CLGaussianPyramidKernel.cpp | 2 +- src/core/CL/kernels/CLGaussianPyramidKernel.h | 111 ++++++++++ .../CL/kernels/CLGenerateProposalsLayerKernel.cpp | 2 +- .../CL/kernels/CLGenerateProposalsLayerKernel.h | 85 ++++++++ src/core/CL/kernels/CLHOGDescriptorKernel.cpp | 2 +- src/core/CL/kernels/CLHOGDescriptorKernel.h | 122 +++++++++++ src/core/CL/kernels/CLHOGDetectorKernel.cpp | 2 +- src/core/CL/kernels/CLHOGDetectorKernel.h | 96 +++++++++ src/core/CL/kernels/CLHarrisCornersKernel.cpp | 2 +- src/core/CL/kernels/CLHarrisCornersKernel.h | 100 +++++++++ .../CL/kernels/CLHeightConcatenateLayerKernel.cpp | 2 +- .../CL/kernels/CLHeightConcatenateLayerKernel.h | 77 +++++++ src/core/CL/kernels/CLHistogramKernel.cpp | 2 +- src/core/CL/kernels/CLHistogramKernel.h | 111 ++++++++++ src/core/CL/kernels/CLIm2ColKernel.cpp | 2 +- src/core/CL/kernels/CLIm2ColKernel.h | 136 +++++++++++++ .../kernels/CLInstanceNormalizationLayerKernel.cpp | 2 +- .../kernels/CLInstanceNormalizationLayerKernel.h | 90 +++++++++ src/core/CL/kernels/CLIntegralImageKernel.cpp | 2 +- src/core/CL/kernels/CLIntegralImageKernel.h | 86 ++++++++ src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp | 2 +- src/core/CL/kernels/CLL2NormalizeLayerKernel.h | 100 +++++++++ src/core/CL/kernels/CLLKTrackerKernel.cpp | 2 +- src/core/CL/kernels/CLLKTrackerKernel.h | 206 +++++++++++++++++++ .../CLLocallyConnectedMatrixMultiplyKernel.cpp | 2 +- .../CLLocallyConnectedMatrixMultiplyKernel.h | 85 ++++++++ src/core/CL/kernels/CLMagnitudePhaseKernel.cpp | 2 +- src/core/CL/kernels/CLMagnitudePhaseKernel.h | 90 +++++++++ src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp | 2 +- src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h | 86 ++++++++ src/core/CL/kernels/CLMeanStdDevKernel.cpp | 2 +- src/core/CL/kernels/CLMeanStdDevKernel.h | 98 +++++++++ .../CL/kernels/CLMeanStdDevNormalizationKernel.cpp | 2 +- .../CL/kernels/CLMeanStdDevNormalizationKernel.h | 90 +++++++++ src/core/CL/kernels/CLMedian3x3Kernel.cpp | 2 +- src/core/CL/kernels/CLMedian3x3Kernel.h | 59 ++++++ src/core/CL/kernels/CLMemsetKernel.cpp | 2 +- src/core/CL/kernels/CLMemsetKernel.h | 85 ++++++++ src/core/CL/kernels/CLMinMaxLayerKernel.cpp | 2 +- src/core/CL/kernels/CLMinMaxLayerKernel.h | 87 ++++++++ src/core/CL/kernels/CLMinMaxLocationKernel.cpp | 2 +- src/core/CL/kernels/CLMinMaxLocationKernel.h | 124 ++++++++++++ src/core/CL/kernels/CLNonLinearFilterKernel.cpp | 2 +- src/core/CL/kernels/CLNonLinearFilterKernel.h | 77 +++++++ .../CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp | 2 +- .../CL/kernels/CLNonMaximaSuppression3x3Kernel.h | 60 ++++++ src/core/CL/kernels/CLNormalizationLayerKernel.cpp | 2 +- src/core/CL/kernels/CLNormalizationLayerKernel.h | 90 +++++++++ .../CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp | 2 +- .../CL/kernels/CLNormalizePlanarYUVLayerKernel.h | 94 +++++++++ src/core/CL/kernels/CLPadLayerKernel.cpp | 2 +- src/core/CL/kernels/CLPadLayerKernel.h | 96 +++++++++ src/core/CL/kernels/CLPermuteKernel.cpp | 2 +- src/core/CL/kernels/CLPermuteKernel.h | 90 +++++++++ .../CL/kernels/CLPixelWiseMultiplicationKernel.cpp | 2 +- .../CL/kernels/CLPixelWiseMultiplicationKernel.h | 196 ++++++++++++++++++ src/core/CL/kernels/CLPoolingLayerKernel.cpp | 6 +- src/core/CL/kernels/CLPoolingLayerKernel.h | 96 +++++++++ src/core/CL/kernels/CLPriorBoxLayerKernel.cpp | 2 +- src/core/CL/kernels/CLPriorBoxLayerKernel.h | 99 +++++++++ .../CL/kernels/CLQLSTMLayerNormalizationKernel.cpp | 2 +- .../CL/kernels/CLQLSTMLayerNormalizationKernel.h | 88 ++++++++ src/core/CL/kernels/CLQuantizationLayerKernel.cpp | 2 +- src/core/CL/kernels/CLQuantizationLayerKernel.h | 86 ++++++++ src/core/CL/kernels/CLROIAlignLayerKernel.cpp | 2 +- src/core/CL/kernels/CLROIAlignLayerKernel.h | 110 ++++++++++ src/core/CL/kernels/CLROIPoolingLayerKernel.cpp | 2 +- src/core/CL/kernels/CLROIPoolingLayerKernel.h | 92 +++++++++ src/core/CL/kernels/CLRangeKernel.cpp | 2 +- src/core/CL/kernels/CLRangeKernel.h | 92 +++++++++ src/core/CL/kernels/CLReductionOperationKernel.cpp | 2 +- src/core/CL/kernels/CLReductionOperationKernel.h | 99 +++++++++ src/core/CL/kernels/CLRemapKernel.cpp | 2 +- src/core/CL/kernels/CLRemapKernel.h | 81 ++++++++ src/core/CL/kernels/CLReorgLayerKernel.cpp | 2 +- src/core/CL/kernels/CLReorgLayerKernel.h | 90 +++++++++ src/core/CL/kernels/CLReshapeLayerKernel.cpp | 2 +- src/core/CL/kernels/CLReshapeLayerKernel.h | 59 ++++++ src/core/CL/kernels/CLReverseKernel.cpp | 2 +- src/core/CL/kernels/CLReverseKernel.h | 84 ++++++++ src/core/CL/kernels/CLScaleKernel.cpp | 4 +- src/core/CL/kernels/CLScaleKernel.h | 93 +++++++++ src/core/CL/kernels/CLScharr3x3Kernel.cpp | 2 +- src/core/CL/kernels/CLScharr3x3Kernel.h | 97 +++++++++ src/core/CL/kernels/CLSelectKernel.cpp | 2 +- src/core/CL/kernels/CLSelectKernel.h | 94 +++++++++ src/core/CL/kernels/CLSobel3x3Kernel.cpp | 2 +- src/core/CL/kernels/CLSobel3x3Kernel.h | 83 ++++++++ src/core/CL/kernels/CLSobel5x5Kernel.cpp | 2 +- src/core/CL/kernels/CLSobel5x5Kernel.h | 139 +++++++++++++ src/core/CL/kernels/CLSobel7x7Kernel.cpp | 2 +- src/core/CL/kernels/CLSobel7x7Kernel.h | 139 +++++++++++++ src/core/CL/kernels/CLSoftmaxLayerKernel.cpp | 2 +- src/core/CL/kernels/CLSoftmaxLayerKernel.h | 158 +++++++++++++++ src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp | 2 +- src/core/CL/kernels/CLSpaceToBatchLayerKernel.h | 122 +++++++++++ src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp | 2 +- src/core/CL/kernels/CLSpaceToDepthLayerKernel.h | 84 ++++++++ src/core/CL/kernels/CLStackLayerKernel.cpp | 2 +- src/core/CL/kernels/CLStackLayerKernel.h | 101 ++++++++++ src/core/CL/kernels/CLStridedSliceKernel.cpp | 2 +- src/core/CL/kernels/CLStridedSliceKernel.h | 79 ++++++++ src/core/CL/kernels/CLTableLookupKernel.cpp | 2 +- src/core/CL/kernels/CLTableLookupKernel.h | 55 +++++ src/core/CL/kernels/CLThresholdKernel.cpp | 2 +- src/core/CL/kernels/CLThresholdKernel.h | 57 ++++++ src/core/CL/kernels/CLTileKernel.cpp | 2 +- src/core/CL/kernels/CLTileKernel.h | 88 ++++++++ src/core/CL/kernels/CLTransposeKernel.cpp | 2 +- src/core/CL/kernels/CLTransposeKernel.h | 64 ++++++ src/core/CL/kernels/CLUpsampleLayerKernel.cpp | 2 +- src/core/CL/kernels/CLUpsampleLayerKernel.h | 89 ++++++++ src/core/CL/kernels/CLWarpAffineKernel.cpp | 2 +- src/core/CL/kernels/CLWarpAffineKernel.h | 62 ++++++ src/core/CL/kernels/CLWarpPerspectiveKernel.cpp | 2 +- src/core/CL/kernels/CLWarpPerspectiveKernel.h | 59 ++++++ src/core/CL/kernels/CLWeightsReshapeKernel.cpp | 2 +- src/core/CL/kernels/CLWeightsReshapeKernel.h | 121 +++++++++++ .../kernels/CLWidthConcatenate2TensorsKernel.cpp | 2 +- .../CL/kernels/CLWidthConcatenate2TensorsKernel.h | 73 +++++++ .../kernels/CLWidthConcatenate4TensorsKernel.cpp | 2 +- .../CL/kernels/CLWidthConcatenate4TensorsKernel.h | 77 +++++++ .../CL/kernels/CLWidthConcatenateLayerKernel.cpp | 2 +- .../CL/kernels/CLWidthConcatenateLayerKernel.h | 74 +++++++ .../CL/kernels/CLWinogradFilterTransformKernel.cpp | 4 +- .../CL/kernels/CLWinogradFilterTransformKernel.h | 115 +++++++++++ .../CL/kernels/CLWinogradInputTransformKernel.cpp | 2 +- .../CL/kernels/CLWinogradInputTransformKernel.h | 121 +++++++++++ .../CL/kernels/CLWinogradOutputTransformKernel.cpp | 2 +- .../CL/kernels/CLWinogradOutputTransformKernel.h | 127 ++++++++++++ src/core/CL/kernels/CLYOLOLayerKernel.cpp | 2 +- src/core/CL/kernels/CLYOLOLayerKernel.h | 98 +++++++++ .../ICLDepthwiseConvolutionLayer3x3Kernel.h | 105 ++++++++++ 267 files changed, 13670 insertions(+), 152 deletions(-) create mode 100644 src/core/CL/kernels/CLAbsoluteDifferenceKernel.h create mode 100644 src/core/CL/kernels/CLAccumulateKernel.h create mode 100644 src/core/CL/kernels/CLActivationLayerKernel.h create mode 100644 src/core/CL/kernels/CLArgMinMaxLayerKernel.h create mode 100644 src/core/CL/kernels/CLBatchConcatenateLayerKernel.h create mode 100644 src/core/CL/kernels/CLBatchNormalizationLayerKernel.h create mode 100644 src/core/CL/kernels/CLBatchToSpaceLayerKernel.h create mode 100644 src/core/CL/kernels/CLBitwiseAndKernel.h create mode 100644 src/core/CL/kernels/CLBitwiseNotKernel.h create mode 100644 src/core/CL/kernels/CLBitwiseOrKernel.h create mode 100644 src/core/CL/kernels/CLBitwiseXorKernel.h create mode 100644 src/core/CL/kernels/CLBoundingBoxTransformKernel.h create mode 100644 src/core/CL/kernels/CLBox3x3Kernel.h create mode 100644 src/core/CL/kernels/CLCannyEdgeKernel.h create mode 100644 src/core/CL/kernels/CLChannelCombineKernel.h create mode 100644 src/core/CL/kernels/CLChannelExtractKernel.h create mode 100644 src/core/CL/kernels/CLChannelShuffleLayerKernel.h create mode 100644 src/core/CL/kernels/CLCol2ImKernel.h create mode 100644 src/core/CL/kernels/CLColorConvertKernel.h create mode 100644 src/core/CL/kernels/CLComparisonKernel.h create mode 100644 src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h create mode 100644 src/core/CL/kernels/CLConvolutionKernel.h create mode 100644 src/core/CL/kernels/CLCopyKernel.h create mode 100644 src/core/CL/kernels/CLCropKernel.h create mode 100644 src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h create mode 100644 src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h create mode 100644 src/core/CL/kernels/CLDepthConcatenateLayerKernel.h create mode 100644 src/core/CL/kernels/CLDepthConvertLayerKernel.h create mode 100644 src/core/CL/kernels/CLDepthToSpaceLayerKernel.h create mode 100644 src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h create mode 100644 src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h create mode 100644 src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h create mode 100644 src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h create mode 100644 src/core/CL/kernels/CLDequantizationLayerKernel.h create mode 100644 src/core/CL/kernels/CLDerivativeKernel.h create mode 100644 src/core/CL/kernels/CLDilateKernel.h create mode 100644 src/core/CL/kernels/CLDirectConvolutionLayerKernel.h create mode 100644 src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h create mode 100644 src/core/CL/kernels/CLElementwiseOperationKernel.h create mode 100644 src/core/CL/kernels/CLErodeKernel.h create mode 100644 src/core/CL/kernels/CLFFTDigitReverseKernel.h create mode 100644 src/core/CL/kernels/CLFFTRadixStageKernel.h create mode 100644 src/core/CL/kernels/CLFFTScaleKernel.h create mode 100644 src/core/CL/kernels/CLFastCornersKernel.h create mode 100644 src/core/CL/kernels/CLFillBorderKernel.h create mode 100644 src/core/CL/kernels/CLFlattenLayerKernel.h create mode 100644 src/core/CL/kernels/CLFloorKernel.h create mode 100644 src/core/CL/kernels/CLFuseBatchNormalizationKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpReductionKernel.h create mode 100644 src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h create mode 100644 src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h create mode 100644 src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h create mode 100644 src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h create mode 100644 src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h create mode 100644 src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h create mode 100644 src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h create mode 100644 src/core/CL/kernels/CLGatherKernel.h create mode 100644 src/core/CL/kernels/CLGaussian3x3Kernel.h create mode 100644 src/core/CL/kernels/CLGaussian5x5Kernel.h create mode 100644 src/core/CL/kernels/CLGaussianPyramidKernel.h create mode 100644 src/core/CL/kernels/CLGenerateProposalsLayerKernel.h create mode 100644 src/core/CL/kernels/CLHOGDescriptorKernel.h create mode 100644 src/core/CL/kernels/CLHOGDetectorKernel.h create mode 100644 src/core/CL/kernels/CLHarrisCornersKernel.h create mode 100644 src/core/CL/kernels/CLHeightConcatenateLayerKernel.h create mode 100644 src/core/CL/kernels/CLHistogramKernel.h create mode 100644 src/core/CL/kernels/CLIm2ColKernel.h create mode 100644 src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h create mode 100644 src/core/CL/kernels/CLIntegralImageKernel.h create mode 100644 src/core/CL/kernels/CLL2NormalizeLayerKernel.h create mode 100644 src/core/CL/kernels/CLLKTrackerKernel.h create mode 100644 src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h create mode 100644 src/core/CL/kernels/CLMagnitudePhaseKernel.h create mode 100644 src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h create mode 100644 src/core/CL/kernels/CLMeanStdDevKernel.h create mode 100644 src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h create mode 100644 src/core/CL/kernels/CLMedian3x3Kernel.h create mode 100644 src/core/CL/kernels/CLMemsetKernel.h create mode 100644 src/core/CL/kernels/CLMinMaxLayerKernel.h create mode 100644 src/core/CL/kernels/CLMinMaxLocationKernel.h create mode 100644 src/core/CL/kernels/CLNonLinearFilterKernel.h create mode 100644 src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h create mode 100644 src/core/CL/kernels/CLNormalizationLayerKernel.h create mode 100644 src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h create mode 100644 src/core/CL/kernels/CLPadLayerKernel.h create mode 100644 src/core/CL/kernels/CLPermuteKernel.h create mode 100644 src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h create mode 100644 src/core/CL/kernels/CLPoolingLayerKernel.h create mode 100644 src/core/CL/kernels/CLPriorBoxLayerKernel.h create mode 100644 src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h create mode 100644 src/core/CL/kernels/CLQuantizationLayerKernel.h create mode 100644 src/core/CL/kernels/CLROIAlignLayerKernel.h create mode 100644 src/core/CL/kernels/CLROIPoolingLayerKernel.h create mode 100644 src/core/CL/kernels/CLRangeKernel.h create mode 100644 src/core/CL/kernels/CLReductionOperationKernel.h create mode 100644 src/core/CL/kernels/CLRemapKernel.h create mode 100644 src/core/CL/kernels/CLReorgLayerKernel.h create mode 100644 src/core/CL/kernels/CLReshapeLayerKernel.h create mode 100644 src/core/CL/kernels/CLReverseKernel.h create mode 100644 src/core/CL/kernels/CLScaleKernel.h create mode 100644 src/core/CL/kernels/CLScharr3x3Kernel.h create mode 100644 src/core/CL/kernels/CLSelectKernel.h create mode 100644 src/core/CL/kernels/CLSobel3x3Kernel.h create mode 100644 src/core/CL/kernels/CLSobel5x5Kernel.h create mode 100644 src/core/CL/kernels/CLSobel7x7Kernel.h create mode 100644 src/core/CL/kernels/CLSoftmaxLayerKernel.h create mode 100644 src/core/CL/kernels/CLSpaceToBatchLayerKernel.h create mode 100644 src/core/CL/kernels/CLSpaceToDepthLayerKernel.h create mode 100644 src/core/CL/kernels/CLStackLayerKernel.h create mode 100644 src/core/CL/kernels/CLStridedSliceKernel.h create mode 100644 src/core/CL/kernels/CLTableLookupKernel.h create mode 100644 src/core/CL/kernels/CLThresholdKernel.h create mode 100644 src/core/CL/kernels/CLTileKernel.h create mode 100644 src/core/CL/kernels/CLTransposeKernel.h create mode 100644 src/core/CL/kernels/CLUpsampleLayerKernel.h create mode 100644 src/core/CL/kernels/CLWarpAffineKernel.h create mode 100644 src/core/CL/kernels/CLWarpPerspectiveKernel.h create mode 100644 src/core/CL/kernels/CLWeightsReshapeKernel.h create mode 100644 src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h create mode 100644 src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h create mode 100644 src/core/CL/kernels/CLWidthConcatenateLayerKernel.h create mode 100644 src/core/CL/kernels/CLWinogradFilterTransformKernel.h create mode 100644 src/core/CL/kernels/CLWinogradInputTransformKernel.h create mode 100644 src/core/CL/kernels/CLWinogradOutputTransformKernel.h create mode 100644 src/core/CL/kernels/CLYOLOLayerKernel.h create mode 100644 src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h (limited to 'src/core/CL/kernels') diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp index 29745beee7..76b60cb9f8 100644 --- a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp +++ b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp @@ -21,14 +21,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h" - #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" + +#include "src/core/CL/kernels/CLAbsoluteDifferenceKernel.h" #include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.h b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.h new file mode 100644 index 0000000000..28f28fe44f --- /dev/null +++ b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H +#define ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the absolute difference kernel. + * + * Absolute difference is computed by: + * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] + */ +class CLAbsoluteDifferenceKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLAbsoluteDifferenceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLAbsoluteDifferenceKernel(const CLAbsoluteDifferenceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLAbsoluteDifferenceKernel &operator=(const CLAbsoluteDifferenceKernel &) = delete; + /** Allow instances of this class to be moved */ + CLAbsoluteDifferenceKernel(CLAbsoluteDifferenceKernel &&) = default; + /** Allow instances of this class to be moved */ + CLAbsoluteDifferenceKernel &operator=(CLAbsoluteDifferenceKernel &&) = default; + /** Default destructor */ + ~CLAbsoluteDifferenceKernel() = default; + + /** Set the inputs and output images. + * + * @param[in] input1 Source tensor. Data types supported: U8/S16. + * @param[in] input2 Source tensor. Data types supported: U8/S16. + * @param[out] output Destination tensor. Data types supported: U8/S16. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Set the inputs and output images. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source tensor. Data types supported: U8/S16. + * @param[in] input2 Source tensor. Data types supported: U8/S16. + * @param[out] output Destination tensor. Data types supported: U8/S16. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1. */ + const ICLTensor *_input2; /**< Source tensor 2. */ + ICLTensor *_output; /**< Destination tensor. */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H */ diff --git a/src/core/CL/kernels/CLAccumulateKernel.cpp b/src/core/CL/kernels/CLAccumulateKernel.cpp index f161906646..b0a8eba644 100644 --- a/src/core/CL/kernels/CLAccumulateKernel.cpp +++ b/src/core/CL/kernels/CLAccumulateKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h" +#include "src/core/CL/kernels/CLAccumulateKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLAccumulateKernel.h b/src/core/CL/kernels/CLAccumulateKernel.h new file mode 100644 index 0000000000..16a715319d --- /dev/null +++ b/src/core/CL/kernels/CLAccumulateKernel.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLACCUMULATEKERNEL_H +#define ARM_COMPUTE_CLACCUMULATEKERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the accumulate kernel. + * + * Accumulation is computed by: + * @f[ accum(x,y) = accum(x,y) + input(x,y) @f] + */ +class CLAccumulateKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and accumulation tensors. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] accum Destination tensor. Data types supported: S16. + */ + void configure(const ICLTensor *input, ICLTensor *accum); + /** Set the input and accumulation tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] accum Destination tensor. Data types supported: S16. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum); +}; + +/** Interface for the accumulate weighted kernel. + * + * Weighted accumulation is computed: + * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f] + * + * Where @f$ 0 \le \alpha \le 1 @f$ + * Conceptually, the rounding for this is defined as: + * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f] +*/ +class CLAccumulateWeightedKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and accumulation images, and the scale value. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32. + * @param[in,out] accum Accumulated tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input, float alpha, ICLTensor *accum); + /** Set the input and accumulation images, and the scale value. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32. + * @param[in,out] accum Accumulated tensor. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum); +}; + +/** Interface for the accumulate squared kernel. + * + * The accumulation of squares is computed: + * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f] + * + * Where @f$ 0 \le shift \le 15 @f$ +*/ +class CLAccumulateSquaredKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32. + * @param[in,out] accum Accumulated tensor. Data types supported: S16. + */ + void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum); + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32. + * @param[in,out] accum Accumulated tensor. Data types supported: S16. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum); +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLACCUMULATEKERNEL_H */ diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index f0e3047796..8ddf8d8f9e 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" +#include "src/core/CL/kernels/CLActivationLayerKernel.h" #include "arm_compute/core/CL/CLCoreRuntimeContext.h" #include "arm_compute/core/CL/CLHelpers.h" diff --git a/src/core/CL/kernels/CLActivationLayerKernel.h b/src/core/CL/kernels/CLActivationLayerKernel.h new file mode 100644 index 0000000000..821418f835 --- /dev/null +++ b/src/core/CL/kernels/CLActivationLayerKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H +#define ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; +/** Interface for the activation layer kernel. */ +class CLActivationLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLActivationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLActivationLayerKernel(const CLActivationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLActivationLayerKernel &operator=(const CLActivationLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLActivationLayerKernel(CLActivationLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLActivationLayerKernel &operator=(CLActivationLayerKernel &&) = default; + /** Default destructor */ + ~CLActivationLayerKernel() = default; + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo act_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayerKernel + * + * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result + * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; + +private: + bool _run_in_place; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp index b5a801a97f..0e6fc6599c 100644 --- a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h" +#include "src/core/CL/kernels/CLArgMinMaxLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.h b/src/core/CL/kernels/CLArgMinMaxLayerKernel.h new file mode 100644 index 0000000000..929677f905 --- /dev/null +++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H +#define ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the reduction operation kernel + * + * @note The default data type for an uninitialized output tensor is + * signed 32-bit integer (S32). It is the user's responsibility to check + * that the results do not overflow because the indices are computed + * in unsigned 32-bit (U32). + */ +class CLArgMinMaxLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLArgMinMaxLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArgMinMaxLayerKernel(const CLArgMinMaxLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArgMinMaxLayerKernel &operator=(const CLArgMinMaxLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLArgMinMaxLayerKernel(CLArgMinMaxLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLArgMinMaxLayerKernel &operator=(CLArgMinMaxLayerKernel &&) = default; + /** Default destructor */ + ~CLArgMinMaxLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32 + * Has to be nullptr for the first iteration + * @param[out] output Destination tensor. Data types supported: U32/S32 + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 + * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported. + */ + void configure(const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32 + * Has to be nullptr for the first iteration + * @param[out] output Destination tensor. Data types supported: U32/S32 + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 + * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op); + + /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayerKernel. + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[in] prev_output Destination tensor info of the previous iterations. Data types supported: U32/S32 + * Has to be nullptr for the first iteration + * @param[in] output Destination tensor info. Data types supported: U32/S32 + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 + * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *prev_output, const ITensorInfo *output, unsigned int axis, ReductionOperation op); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_prev_output; + ICLTensor *_output; + unsigned int _reduction_axis; + ReductionOperation _op; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp index 7a8c9ad0fb..7e9424f58b 100644 --- a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h" +#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h new file mode 100644 index 0000000000..54a89eb243 --- /dev/null +++ b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H +#define ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the batch concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class CLBatchConcatenateLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLBatchConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchConcatenateLayerKernel(const CLBatchConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchConcatenateLayerKernel &operator=(const CLBatchConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBatchConcatenateLayerKernel(CLBatchConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBatchConcatenateLayerKernel &operator=(CLBatchConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~CLBatchConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[in] batch_offset The offset on axis # 3. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: All. + * @param[in] batch_offset The offset on axis # 3. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; + +private: + unsigned int _batch_offset; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H */ diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp index 09b668d6cd..9aeca3bcfe 100644 --- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h" +#include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.h new file mode 100644 index 0000000000..743f4a9594 --- /dev/null +++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the BatchNormalization layer kernel. + */ +class CLBatchNormalizationLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLBatchNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchNormalizationLayerKernel(const CLBatchNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchNormalizationLayerKernel &operator=(const CLBatchNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + CLBatchNormalizationLayerKernel(CLBatchNormalizationLayerKernel &&) = default; + /** Default move assignment operator */ + CLBatchNormalizationLayerKernel &operator=(CLBatchNormalizationLayerKernel &&) = default; + /** Default destructor */ + ~CLBatchNormalizationLayerKernel() = default; + + /** Set the input and output tensors. + * + * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place + * + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input + * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input + * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + */ + void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr, float epsilon = 0.001f, + ActivationLayerInfo act_info = ActivationLayerInfo()); + /** Set the input and output tensors. + * + * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input + * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input + * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, + const ICLTensor *gamma = nullptr, float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayerKernel + * + * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input + * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input + * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const ITensorInfo *mean, const ITensorInfo *var, + const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr, + float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_mean; + const ICLTensor *_var; + const ICLTensor *_beta; + const ICLTensor *_gamma; + float _epsilon; + bool _run_in_place; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp index e5997fb4d2..da41feb7b8 100644 --- a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h" +#include "src/core/CL/kernels/CLBatchToSpaceLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.h b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.h new file mode 100644 index 0000000000..131a43e59c --- /dev/null +++ b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H +#define ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the batch to space kernel */ +class CLBatchToSpaceLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLBatchToSpaceLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchToSpaceLayerKernel(const CLBatchToSpaceLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchToSpaceLayerKernel &operator=(const CLBatchToSpaceLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBatchToSpaceLayerKernel(CLBatchToSpaceLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBatchToSpaceLayerKernel &operator=(CLBatchToSpaceLayerKernel &&) = default; + /** Default destructor */ + ~CLBatchToSpaceLayerKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output); + /** Initialise the kernel's inputs and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output); + /** Initialise the kernel's inputs and output (Static block shape). + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output); + /** Initialise the kernel's inputs and output (Static block shape). + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[in] output Tensor output. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel (Static block shape). + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[in] output Tensor output. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + const ICLTensor *_block_shape; /**< Block shape tensor */ + ICLTensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLBitwiseAndKernel.cpp b/src/core/CL/kernels/CLBitwiseAndKernel.cpp index 53a438dcf6..91a659284a 100644 --- a/src/core/CL/kernels/CLBitwiseAndKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseAndKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h" +#include "src/core/CL/kernels/CLBitwiseAndKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLBitwiseAndKernel.h b/src/core/CL/kernels/CLBitwiseAndKernel.h new file mode 100644 index 0000000000..01018ee09d --- /dev/null +++ b/src/core/CL/kernels/CLBitwiseAndKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLBITWISEANDKERNEL_H +#define ARM_COMPUTE_CLBITWISEANDKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise AND operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f] + */ +class CLBitwiseAndKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLBitwiseAndKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBitwiseAndKernel(const CLBitwiseAndKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBitwiseAndKernel &operator=(const CLBitwiseAndKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBitwiseAndKernel(CLBitwiseAndKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBitwiseAndKernel &operator=(CLBitwiseAndKernel &&) = default; + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Set the inputs and output images + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLBITWISEANDKERNEL_H */ diff --git a/src/core/CL/kernels/CLBitwiseNotKernel.cpp b/src/core/CL/kernels/CLBitwiseNotKernel.cpp index 08e4c54957..118bfe8139 100644 --- a/src/core/CL/kernels/CLBitwiseNotKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseNotKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h" +#include "src/core/CL/kernels/CLBitwiseNotKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLBitwiseNotKernel.h b/src/core/CL/kernels/CLBitwiseNotKernel.h new file mode 100644 index 0000000000..bf68bc7ae5 --- /dev/null +++ b/src/core/CL/kernels/CLBitwiseNotKernel.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLBITWISENOTKERNEL_H +#define ARM_COMPUTE_CLBITWISENOTKERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise NOT operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = \lnot input(x,y) @f] + */ +class CLBitwiseNotKernel : public ICLSimple2DKernel +{ +public: + /** Set the inputs and output images. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Set the inputs and output images. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLBITWISENOTKERNEL_H */ diff --git a/src/core/CL/kernels/CLBitwiseOrKernel.cpp b/src/core/CL/kernels/CLBitwiseOrKernel.cpp index 0e2e5d4f3c..8954d9aa6d 100644 --- a/src/core/CL/kernels/CLBitwiseOrKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseOrKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h" +#include "src/core/CL/kernels/CLBitwiseOrKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLBitwiseOrKernel.h b/src/core/CL/kernels/CLBitwiseOrKernel.h new file mode 100644 index 0000000000..c27d0c27e2 --- /dev/null +++ b/src/core/CL/kernels/CLBitwiseOrKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLBITWISEORKERNEL_H +#define ARM_COMPUTE_CLBITWISEORKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise OR operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f] + */ +class CLBitwiseOrKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLBitwiseOrKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBitwiseOrKernel(const CLBitwiseOrKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBitwiseOrKernel &operator=(const CLBitwiseOrKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBitwiseOrKernel(CLBitwiseOrKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBitwiseOrKernel &operator=(CLBitwiseOrKernel &&) = default; + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Set the inputs and output images + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLBITWISEORKERNEL_H */ diff --git a/src/core/CL/kernels/CLBitwiseXorKernel.cpp b/src/core/CL/kernels/CLBitwiseXorKernel.cpp index 65b17c02bd..69eb38e2e6 100644 --- a/src/core/CL/kernels/CLBitwiseXorKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseXorKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBitwiseXorKernel.h" +#include "src/core/CL/kernels/CLBitwiseXorKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLBitwiseXorKernel.h b/src/core/CL/kernels/CLBitwiseXorKernel.h new file mode 100644 index 0000000000..b4861ea757 --- /dev/null +++ b/src/core/CL/kernels/CLBitwiseXorKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLBITWISEXORKERNEL_H +#define ARM_COMPUTE_CLBITWISEXORKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise XOR operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f] + */ +class CLBitwiseXorKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLBitwiseXorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBitwiseXorKernel(const CLBitwiseXorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBitwiseXorKernel &operator=(const CLBitwiseXorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBitwiseXorKernel(CLBitwiseXorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBitwiseXorKernel &operator=(CLBitwiseXorKernel &&) = default; + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Set the inputs and output images + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLBITWISEXORKERNEL_H */ diff --git a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp index b8c0d2f2b8..bcfd9b8e5a 100644 --- a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp +++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h" +#include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLBoundingBoxTransformKernel.h b/src/core/CL/kernels/CLBoundingBoxTransformKernel.h new file mode 100644 index 0000000000..08f350e86a --- /dev/null +++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H +#define ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bounding box kernel */ +class CLBoundingBoxTransformKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLBoundingBoxTransformKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBoundingBoxTransformKernel(const CLBoundingBoxTransformKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBoundingBoxTransformKernel &operator=(const CLBoundingBoxTransformKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBoundingBoxTransformKernel(CLBoundingBoxTransformKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBoundingBoxTransformKernel &operator=(CLBoundingBoxTransformKernel &&) = default; + /** Default destructor */ + ~CLBoundingBoxTransformKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. + * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input + * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. + * + * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. + * + */ + void configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. + * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input + * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. + * + * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. + * + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info); + + /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform + * + * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. + * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input + * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. + * + * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. + * + * @return a Status + */ + static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_boxes; + ICLTensor *_pred_boxes; + const ICLTensor *_deltas; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H */ diff --git a/src/core/CL/kernels/CLBox3x3Kernel.cpp b/src/core/CL/kernels/CLBox3x3Kernel.cpp index 2f6c09df0b..9f493b4fb8 100644 --- a/src/core/CL/kernels/CLBox3x3Kernel.cpp +++ b/src/core/CL/kernels/CLBox3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBox3x3Kernel.h" +#include "src/core/CL/kernels/CLBox3x3Kernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLBox3x3Kernel.h b/src/core/CL/kernels/CLBox3x3Kernel.h new file mode 100644 index 0000000000..2373c4a928 --- /dev/null +++ b/src/core/CL/kernels/CLBox3x3Kernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLBOX3X3KERNEL_H +#define ARM_COMPUTE_CLBOX3X3KERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the box 3x3 filter kernel. + * + */ +class CLBox3x3Kernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /**Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); + + //Inherited methods overriden: + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLBOX3X3KERNEL_H */ diff --git a/src/core/CL/kernels/CLCannyEdgeKernel.cpp b/src/core/CL/kernels/CLCannyEdgeKernel.cpp index c76ec6769e..1fe944c8a2 100644 --- a/src/core/CL/kernels/CLCannyEdgeKernel.cpp +++ b/src/core/CL/kernels/CLCannyEdgeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h" +#include "src/core/CL/kernels/CLCannyEdgeKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLCannyEdgeKernel.h b/src/core/CL/kernels/CLCannyEdgeKernel.h new file mode 100644 index 0000000000..7543822d8d --- /dev/null +++ b/src/core/CL/kernels/CLCannyEdgeKernel.h @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCANNYEDGEKERNEL_H +#define ARM_COMPUTE_CLCANNYEDGEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform Gradient computation. + */ +class CLGradientKernel : public ICLKernel +{ +public: + /** Constructor */ + CLGradientKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGradientKernel(const CLGradientKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGradientKernel &operator=(const CLGradientKernel &) = delete; + /** Initialise the kernel's sources, destinations and border mode. + * + * @note gx, gy and mag must all be the same size (either 16 or 32). + * + * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32. + * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx. + * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy. + * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8. + * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. + */ + void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type); + /** Initialise the kernel's sources, destinations and border mode. + * + * @note gx, gy and mag must all be the same size (either 16 or 32). + * + * @param[in] compile_context The compile context to be used. + * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32. + * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx. + * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy. + * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8. + * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_gx; /**< Source tensor - Gx component */ + const ICLTensor *_gy; /**< Source tensor - Gy component */ + ICLTensor *_magnitude; /**< Destination tensor - Magnitude */ + ICLTensor *_phase; /**< Destination tensor - Quantized phase */ +}; + +/** OpenCL kernel to perform Non-Maxima suppression for Canny Edge. + * + * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input + * to characterize points as possible edges. The output buffer needs to be cleared before this kernel is executed. + * + * @note Hysteresis is computed in @ref CLEdgeTraceKernel + */ +class CLEdgeNonMaxSuppressionKernel : public ICLKernel +{ +public: + /** Constructor */ + CLEdgeNonMaxSuppressionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeNonMaxSuppressionKernel(const CLEdgeNonMaxSuppressionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeNonMaxSuppressionKernel &operator=(const CLEdgeNonMaxSuppressionKernel &) = delete; + /** Initialise the kernel's sources, destination and border mode. + * + * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32. + * @param[in] phase Source tensor - Quantized phase. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U16/U32. + * @param[in] lower_thr Lower threshold. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined); + /** Initialise the kernel's sources, destination and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32. + * @param[in] phase Source tensor - Quantized phase. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U16/U32. + * @param[in] lower_thr Lower threshold. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_magnitude; /**< Source tensor - Magnitude. */ + const ICLTensor *_phase; /**< Source tensor - Quantized phase. */ + ICLTensor *_output; /**< Destination tensor. */ +}; + +/** OpenCL kernel to perform Edge tracing. + */ +class CLEdgeTraceKernel : public ICLKernel +{ +public: + /** Constructor */ + CLEdgeTraceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeTraceKernel(const CLEdgeTraceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeTraceKernel &operator=(const CLEdgeTraceKernel &) = delete; + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data types supported: U16/U32. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32. + * Expected to be initialized to 0 before each run. + * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32 + * Expected to be initialized to 0 before each run. + * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32. + * Expected to be initialized to 0 before each run. + * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8. + * Expected to be initialized to 0 before each run. + */ + void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, + ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter); + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U16/U32. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32. + * Expected to be initialized to 0 before each run. + * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32 + * Expected to be initialized to 0 before each run. + * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32. + * Expected to be initialized to 0 before each run. + * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8. + * Expected to be initialized to 0 before each run. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, + ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor. */ + ICLTensor *_output; /**< Destination tensor. */ + int32_t _lower_thr; /**< Lower threshold used for the hysteresis. */ + int32_t _upper_thr; /**< Upper threshold used for the hysteresis. */ + ICLTensor *_visited; /**< Marks visited elements */ + ICLTensor *_recorded; /**< Marks recorded elements */ + ICLTensor *_l1_stack; /**< L1 hysteris stack */ + ICLTensor *_l1_stack_counter; /**< L1 hysteris stack counter */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLCANNYEDGEKERNEL_H */ diff --git a/src/core/CL/kernels/CLChannelCombineKernel.cpp b/src/core/CL/kernels/CLChannelCombineKernel.cpp index d574f352ae..52ba9dd065 100644 --- a/src/core/CL/kernels/CLChannelCombineKernel.cpp +++ b/src/core/CL/kernels/CLChannelCombineKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLChannelCombineKernel.h" +#include "src/core/CL/kernels/CLChannelCombineKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLMultiImage.h" diff --git a/src/core/CL/kernels/CLChannelCombineKernel.h b/src/core/CL/kernels/CLChannelCombineKernel.h new file mode 100644 index 0000000000..f19995aa8e --- /dev/null +++ b/src/core/CL/kernels/CLChannelCombineKernel.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H +#define ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include +#include + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the channel combine kernel */ +class CLChannelCombineKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLChannelCombineKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelCombineKernel(const CLChannelCombineKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelCombineKernel &operator=(const CLChannelCombineKernel &) = delete; + /** Allow instances of this class to be moved */ + CLChannelCombineKernel(CLChannelCombineKernel &&) = default; + /** Allow instances of this class to be moved */ + CLChannelCombineKernel &operator=(CLChannelCombineKernel &&) = default; + /** Default destructor */ + ~CLChannelCombineKernel() = default; + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. + * @param[out] output The single planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422. + */ + void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); + /** Configure function's inputs and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. + * @param[out] output The single planar output tensor. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422. + */ + void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); + /** Configure function's inputs and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + std::array _planes; + ICLTensor *_output; + ICLMultiImage *_output_multi; + std::array _x_subsampling; + std::array _y_subsampling; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H */ diff --git a/src/core/CL/kernels/CLChannelExtractKernel.cpp b/src/core/CL/kernels/CLChannelExtractKernel.cpp index 7911b948ae..cbf504b98b 100644 --- a/src/core/CL/kernels/CLChannelExtractKernel.cpp +++ b/src/core/CL/kernels/CLChannelExtractKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLChannelExtractKernel.h" +#include "src/core/CL/kernels/CLChannelExtractKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLMultiImage.h" diff --git a/src/core/CL/kernels/CLChannelExtractKernel.h b/src/core/CL/kernels/CLChannelExtractKernel.h new file mode 100644 index 0000000000..37abde548c --- /dev/null +++ b/src/core/CL/kernels/CLChannelExtractKernel.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H +#define ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +#include + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the channel extract kernel */ +class CLChannelExtractKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLChannelExtractKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelExtractKernel(const CLChannelExtractKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelExtractKernel &operator=(const CLChannelExtractKernel &) = delete; + /** Allow instances of this class to be moved */ + CLChannelExtractKernel(CLChannelExtractKernel &&) = default; + /** Allow instances of this class to be moved */ + CLChannelExtractKernel &operator=(CLChannelExtractKernel &&) = default; + /** Default destructor */ + ~CLChannelExtractKernel() = default; + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 + * @param[in] channel Channel to extract. + * @param[out] output Destination tensor. Must be of U8 format. + */ + void configure(const ICLTensor *input, Channel channel, ICLTensor *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 + * @param[in] channel Channel to extract. + * @param[out] output Destination tensor. Must be of U8 format. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 + * @param[in] channel Channel to extract. + * @param[out] output Single-planar 2D destination image. Must be of U8 format. + */ + void configure(const ICLMultiImage *input, Channel channel, ICLImage *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 + * @param[in] channel Channel to extract. + * @param[out] output Single-planar 2D destination image. Must be of U8 format. + */ + void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + uint32_t _num_elems_processed_per_iteration; + uint32_t _subsampling; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H */ diff --git a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp index 301a762850..c969792c3e 100644 --- a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp +++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h" +#include "src/core/CL/kernels/CLChannelShuffleLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLChannelShuffleLayerKernel.h b/src/core/CL/kernels/CLChannelShuffleLayerKernel.h new file mode 100644 index 0000000000..31c007f17e --- /dev/null +++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H +#define ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the channel shuffle kernel */ +class CLChannelShuffleLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLChannelShuffleLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelShuffleLayerKernel(const CLChannelShuffleLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelShuffleLayerKernel &operator=(const CLChannelShuffleLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLChannelShuffleLayerKernel(CLChannelShuffleLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLChannelShuffleLayerKernel &operator=(CLChannelShuffleLayerKernel &&) = default; + /** Default destructor */ + ~CLChannelShuffleLayerKernel() = default; + /** Configure function's inputs and outputs. + * + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. + */ + void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups); + /** Configure function's inputs and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups); + /** Static function to check if given info will lead to a valid configuration of @ref CLChannelShuffleLayerKernel + * + * @param[in] input Input tensor info. Data types supported: All. + * @param[in] output Output tensor info. Data type supported: Same as @p input + * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp index 3dc007d9e0..44b8471725 100644 --- a/src/core/CL/kernels/CLCol2ImKernel.cpp +++ b/src/core/CL/kernels/CLCol2ImKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" +#include "src/core/CL/kernels/CLCol2ImKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLCol2ImKernel.h b/src/core/CL/kernels/CLCol2ImKernel.h new file mode 100644 index 0000000000..710e048bca --- /dev/null +++ b/src/core/CL/kernels/CLCol2ImKernel.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCOL2IMKERNEL_H +#define ARM_COMPUTE_CLCOL2IMKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the col2im reshaping kernel. + * + * Rearranges each matrix column into image blocks. It's the inverse operation of @ref CLIm2ColKernel. + * + * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: + * + * @f[ + * \left( \begin{array}{ccccccccc} + * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccc} + * a0 & a1 & a2 \\ + * a3 & a4 & a5 \\ + * a6 & a7 & a8 \\ + * \end{array} \right) + * @f] + */ +class CLCol2ImKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLCol2ImKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCol2ImKernel(const CLCol2ImKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCol2ImKernel &operator=(const CLCol2ImKernel &) = delete; + /** Allow instances of this class to be moved */ + CLCol2ImKernel(CLCol2ImKernel &&) = default; + /** Allow instances of this class to be moved */ + CLCol2ImKernel &operator=(CLCol2ImKernel &&) = default; + /** Default destructor */ + ~CLCol2ImKernel() = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW + * @param[in] convolved_dims Output convolved dimensions. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution + */ + void configure(const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1); + /** Set the input and output of the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW + * @param[in] convolved_dims Output convolved dimensions. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1); + /** Static function to check if given info will lead to a valid configuration of @ref CLCol2ImKernel + * + * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW + * @param[in] convolved_dims Output convolved dimensions. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims, unsigned int num_groups = 1); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +public: + const ICLTensor *_input; + ICLTensor *_output; + Size2D _convolved_dims; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLCOL2IMKERNEL_H */ diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp index 0f82d87348..6c61fec997 100644 --- a/src/core/CL/kernels/CLColorConvertKernel.cpp +++ b/src/core/CL/kernels/CLColorConvertKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h" +#include "src/core/CL/kernels/CLColorConvertKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLMultiImage.h" diff --git a/src/core/CL/kernels/CLColorConvertKernel.h b/src/core/CL/kernels/CLColorConvertKernel.h new file mode 100644 index 0000000000..0f082914cd --- /dev/null +++ b/src/core/CL/kernels/CLColorConvertKernel.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCOLORCONVERTKERNEL_H +#define ARM_COMPUTE_CLCOLORCONVERTKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the color convert kernel. + * + */ +class CLColorConvertKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLColorConvertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLColorConvertKernel(const CLColorConvertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLColorConvertKernel &operator=(const CLColorConvertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLColorConvertKernel(CLColorConvertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLColorConvertKernel &operator=(CLColorConvertKernel &&) = default; + /** Default destructor. */ + ~CLColorConvertKernel() = default; + + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 + * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), + * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), + * U8 (if the formats of @p input is RGB888) + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 + * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), + * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), + * U8 (if the formats of @p input is RGB888) + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 + */ + void configure(const ICLMultiImage *input, ICLImage *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 + */ + void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output); + /** Set the input and output of the kernel + * + * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) + */ + void configure(const ICLImage *input, ICLMultiImage *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) + */ + void configure(const ICLMultiImage *input, ICLMultiImage *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) + */ + void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /*pointer to single planar tensor input */ + ICLTensor *_output; /*pointer to single planar tensor output */ + const ICLMultiImage *_multi_input; /*pointer to multi-planar input */ + ICLMultiImage *_multi_output; /*pointer to multi-planar output */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLCOLORCONVERTKERNEL_H */ diff --git a/src/core/CL/kernels/CLComparisonKernel.cpp b/src/core/CL/kernels/CLComparisonKernel.cpp index 2b72946f49..e2aee36bd8 100644 --- a/src/core/CL/kernels/CLComparisonKernel.cpp +++ b/src/core/CL/kernels/CLComparisonKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLComparisonKernel.h" +#include "src/core/CL/kernels/CLComparisonKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLComparisonKernel.h b/src/core/CL/kernels/CLComparisonKernel.h new file mode 100644 index 0000000000..0b94190183 --- /dev/null +++ b/src/core/CL/kernels/CLComparisonKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCOMPARISONKERNEL_H +#define ARM_COMPUTE_CLCOMPARISONKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** Interface for the comparison kernel. */ +class CLComparisonKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLComparisonKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComparisonKernel(const CLComparisonKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComparisonKernel &operator=(const CLComparisonKernel &) = delete; + /** Allow instances of this class to be moved */ + CLComparisonKernel(CLComparisonKernel &&) = default; + /** Allow instances of this class to be moved */ + CLComparisonKernel &operator=(CLComparisonKernel &&) = default; + /** Default destructor */ + ~CLComparisonKernel() = default; + /** Set the inputs and output tensors + * + * @param[in] input1 Source tensor. Data types supported: All. + * @param[in] input2 Source tensor. Data types supported: Same as @p input1. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] operation Comparison operation to use. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation); + /** Set the inputs and output tensors + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source tensor. Data types supported: All. + * @param[in] input2 Source tensor. Data types supported: Same as @p input1. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] operation Comparison operation to use. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation); + /** Static function to check if given info will lead to a valid configuration of @ref CLComparisonKernel + * + * @param[in] input1 Source tensor. Data types supported: All. + * @param[in] input2 Source tensor. Data types supported: Same as @p input1. + * @param[in] output Destination tensor. Data types supported: U8. + * @param[in] operation Comparison operation to use. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation operation); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLCOMPARISONKERNEL_H */ diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp index c7888c9c76..dcf4e6662e 100644 --- a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp +++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h" +#include "src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h new file mode 100644 index 0000000000..d1da793df2 --- /dev/null +++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H +#define ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa. + * + * @note This function can be applied to the 2D weights used by a Fully Connected layer if: + * - It follows a Convolution layer + * - The data layout used by the network does not match the one the model has been trained in. + * + * @note This function assumes the weights are already reshaped (transposed) + */ +class CLConvertFullyConnectedWeightsKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLConvertFullyConnectedWeightsKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvertFullyConnectedWeightsKernel(const CLConvertFullyConnectedWeightsKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvertFullyConnectedWeightsKernel &operator=(const CLConvertFullyConnectedWeightsKernel &) = delete; + /** Allow instances of this class to be moved */ + CLConvertFullyConnectedWeightsKernel(CLConvertFullyConnectedWeightsKernel &&) = default; + /** Allow instances of this class to be moved */ + CLConvertFullyConnectedWeightsKernel &operator=(CLConvertFullyConnectedWeightsKernel &&) = default; + /** Default destructor */ + ~CLConvertFullyConnectedWeightsKernel() = default; + /** Set the input and output tensor. + * + * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. + * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. + * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). + * @param[in] data_layout The data layout the weights have been trained in. + */ + void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout); + /** Set the input and output tensor. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. + * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. + * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). + * @param[in] data_layout The data layout the weights have been trained in. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout); + /** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeightsKernel + * + * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All. + * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input. + * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). + * @param[in] data_layout The data layout the weights have been trained in. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H */ diff --git a/src/core/CL/kernels/CLConvolutionKernel.cpp b/src/core/CL/kernels/CLConvolutionKernel.cpp index 48b185f78d..21f1047cc6 100644 --- a/src/core/CL/kernels/CLConvolutionKernel.cpp +++ b/src/core/CL/kernels/CLConvolutionKernel.cpp @@ -21,11 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" +#include "src/core/CL/kernels/CLConvolutionKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Error.h" @@ -33,6 +32,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" +#include "src/core/CL/ICLKernel.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLConvolutionKernel.h b/src/core/CL/kernels/CLConvolutionKernel.h new file mode 100644 index 0000000000..33e73caf11 --- /dev/null +++ b/src/core/CL/kernels/CLConvolutionKernel.h @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCONVOLUTIONKERNEL_H +#define ARM_COMPUTE_CLCONVOLUTIONKERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/****************************************************************************************\ + * Square Convolution * +\****************************************************************************************/ + +/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9). + * The client can supply a convolution matrix \f$ C_{m,n} \f$. + * @f{eqnarray}{ + * k_0 &=& \frac{m}{2} \\ + * l_0 &=& \frac{n}{2} \\ + * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l} + * @f} + * + * @note The above equation for this function is similar to the default OpenCV Filter2D function, + * which actually computes a correlation and not a convolution. + * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically. + */ +template +class CLConvolutionKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; + +/** Interface for the kernel which applies a 3x3 convolution to a tensor. */ +using CLConvolution3x3Kernel = CLConvolutionKernel<3>; +/** Interface for the kernel which applies a 5x5 convolution to a tensor. */ +using CLConvolution5x5Kernel = CLConvolutionKernel<5>; +/** Interface for the kernel which applies a 7x7 convolution to a tensor. */ +using CLConvolution7x7Kernel = CLConvolutionKernel<7>; +/** Interface for the kernel which applies a 9x9 convolution to a tensor. */ +using CLConvolution9x9Kernel = CLConvolutionKernel<9>; + +/****************************************************************************************\ + * Separable Square Convolution * +\****************************************************************************************/ + +/** Kernel for the Horizontal pass of a Separable Convolution. Currently support 5x5, 7x7, 9x9 */ +template +class CLSeparableConvolutionHorKernel : public ICLSimple2DKernel +{ +public: + /** Default Constructor */ + CLSeparableConvolutionHorKernel(); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U16/S16/S32. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; + +private: + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel which applies a horizontal pass of 5x5 convolution to a tensor. */ +using CLSeparableConvolution5x5HorKernel = CLSeparableConvolutionHorKernel<5>; +/** Interface for the kernel which applies a horizontal pass of 7x7 convolution to a tensor. */ +using CLSeparableConvolution7x7HorKernel = CLSeparableConvolutionHorKernel<7>; +/** Interface for the kernel which applies a horizontal pass of 9x9 convolution to a tensor. */ +using CLSeparableConvolution9x9HorKernel = CLSeparableConvolutionHorKernel<9>; + +/** Kernel for the Vertical pass of a Separable Convolution. Currently supports 5x5, 7x7, 9x9 */ +template +class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U16/S16/S32. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U16/S16/S32. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; + +/** Interface for the kernel which applies a vertical pass of 5x5 convolution to a tensor. */ +using CLSeparableConvolution5x5VertKernel = CLSeparableConvolutionVertKernel<5>; +/** Interface for the kernel which applies a vertical pass of 7x7 convolution to a tensor. */ +using CLSeparableConvolution7x7VertKernel = CLSeparableConvolutionVertKernel<7>; +/** Interface for the kernel which applies a vertical pass of 9x9 convolution to a tensor. */ +using CLSeparableConvolution9x9VertKernel = CLSeparableConvolutionVertKernel<9>; + +/****************************************************************************************\ + * Rectangle Convolution * +\****************************************************************************************/ + +/** Kernel for the running convolution on a rectangle matrix. + * + * @note Supports combinations of 3,5,7 and 9. + */ +class CLConvolutionRectangleKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLConvolutionRectangleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvolutionRectangleKernel(const CLConvolutionRectangleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvolutionRectangleKernel &operator=(const CLConvolutionRectangleKernel &) = delete; + /** Allow instances of this class to be moved */ + CLConvolutionRectangleKernel(CLConvolutionRectangleKernel &&) = default; + /** Allow instances of this class to be moved */ + CLConvolutionRectangleKernel &operator=(CLConvolutionRectangleKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] width Width of convolution matrix (Number of columns) + * @param[in] height Height of convolution matrix (Number of rows) + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] width Width of convolution matrix (Number of columns) + * @param[in] height Height of convolution matrix (Number of rows) + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + BorderSize _border_size; + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLCONVOLUTIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLCopyKernel.cpp b/src/core/CL/kernels/CLCopyKernel.cpp index 184b80caa8..ca38b65df4 100644 --- a/src/core/CL/kernels/CLCopyKernel.cpp +++ b/src/core/CL/kernels/CLCopyKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLCopyKernel.h" +#include "src/core/CL/kernels/CLCopyKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLCopyKernel.h b/src/core/CL/kernels/CLCopyKernel.h new file mode 100644 index 0000000000..9a20b88884 --- /dev/null +++ b/src/core/CL/kernels/CLCopyKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCOPYKERNEL_H +#define ARM_COMPUTE_CLCOPYKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a copy between two tensors */ +class CLCopyKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLCopyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLCopyKernel(const CLCopyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLCopyKernel &operator=(const CLCopyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLCopyKernel(CLCopyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLCopyKernel &operator=(CLCopyKernel &&) = default; + /** Initialize the kernel's input, output. + * + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. + */ + void configure(const ICLTensor *input, ICLTensor *output, Window *output_window = nullptr); + /** Initialize the kernel's input, output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Window *output_window = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref CLCopyKernel + * + * @param[in] input Source tensor info. Data types supported: All. + * @param[in] output Destination tensor info. Data types supported: same as @p input. + * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, Window *output_window = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + Window _output_window; + bool _has_output_window; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLCOPYKERNEL_H */ diff --git a/src/core/CL/kernels/CLCropKernel.cpp b/src/core/CL/kernels/CLCropKernel.cpp index 2c99d46929..9cf15ff93b 100644 --- a/src/core/CL/kernels/CLCropKernel.cpp +++ b/src/core/CL/kernels/CLCropKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLCropKernel.h" +#include "src/core/CL/kernels/CLCropKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLCropKernel.h b/src/core/CL/kernels/CLCropKernel.h new file mode 100644 index 0000000000..cbfada58ab --- /dev/null +++ b/src/core/CL/kernels/CLCropKernel.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCROPKERNEL_H +#define ARM_COMPUTE_CLCROPKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a copy between two tensors */ +class CLCropKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLCropKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLCropKernel(const CLCropKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLCropKernel &operator=(const CLCropKernel &) = delete; + /** Allow instances of this class to be moved */ + CLCropKernel(CLCropKernel &&) = default; + /** Allow instances of this class to be moved */ + CLCropKernel &operator=(CLCropKernel &&) = default; + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC. + * @param[out] output Destination tensor. Data type supported: F32 + * @param[in] start Coordinates of where to start cropping the image. + * @param[in] end Coordinates of where to end cropping the image. + * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input. + * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. + * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. + */ + void configure(const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, Window *output_window = nullptr); + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC. + * @param[out] output Destination tensor. Data type supported: F32 + * @param[in] start Coordinates of where to start cropping the image. + * @param[in] end Coordinates of where to end cropping the image. + * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input. + * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. + * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, + Window *output_window = nullptr); + + /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] input Source tensor info. Data type supported: All. Data layouts supported: NHWC. + * @param[in] output Destination tensor info. Data type supported: F32 + * @param[in] start Coordinates of where to start cropping the image. + * @param[in] end Coordinates of where to end cropping the image. + * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input. + * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. + * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, + Window *output_window = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + Coordinates2D _start; + uint32_t _batch_index; + float _extrapolation_value; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLCROPKERNEL_H */ diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp index 9ba3dc3d8f..d01a00d61e 100644 --- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h" +#include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h new file mode 100644 index 0000000000..e0d1322341 --- /dev/null +++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H +#define ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the Deconvolution layer kernel on OpenCL. + */ +class CLDeconvolutionLayerUpsampleKernel : public ICLKernel +{ +public: + /** Constructor */ + CLDeconvolutionLayerUpsampleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDeconvolutionLayerUpsampleKernel(const CLDeconvolutionLayerUpsampleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDeconvolutionLayerUpsampleKernel &operator=(const CLDeconvolutionLayerUpsampleKernel &) = delete; + /** Default Move Constructor. */ + CLDeconvolutionLayerUpsampleKernel(CLDeconvolutionLayerUpsampleKernel &&) = default; + /** Default move assignment operator */ + CLDeconvolutionLayerUpsampleKernel &operator=(CLDeconvolutionLayerUpsampleKernel &&) = default; + /** Default destructor */ + ~CLDeconvolutionLayerUpsampleKernel() = default; + + /** Initialise the kernel's input and output. + * + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample + * + * @param[in] input Source tensor info. Data types supported: All. + * @param[in] output Destination tensor info. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + PadStrideInfo _info; + DataLayout _data_layout; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H */ diff --git a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp index 1514d906dc..ea22ec0067 100644 --- a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h" +#include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h new file mode 100644 index 0000000000..ce354fa86f --- /dev/null +++ b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H +#define ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H + +#include "src/core/CL/ICLSimpleKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the OpenCL kernel to be used for reshaping the tensor before returning the result of deconvolution. + * + * The input tensor to this OpenCL kernel is expected to be the result of a @ref CLGEMM operation between the Deconvolution input and the Deconvolution filter. + * + * The input tensor should have the following shape: [filter_width * filter_height * ofms, width, height, batch_size] + * + * The output tensor should have the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size] + * + * For example, given a tensor with dimensions [4, 2, 2] this function returns a tensor with dimensions [1, 4, 4]. + * + */ +class CLDeconvolutionReshapeOutputKernel : public ICLSimpleKernel +{ +public: + /** Default constructor */ + CLDeconvolutionReshapeOutputKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDeconvolutionReshapeOutputKernel(const CLDeconvolutionReshapeOutputKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDeconvolutionReshapeOutputKernel &operator=(const CLDeconvolutionReshapeOutputKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDeconvolutionReshapeOutputKernel(CLDeconvolutionReshapeOutputKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDeconvolutionReshapeOutputKernel &operator=(CLDeconvolutionReshapeOutputKernel &&) = default; + /** Default destructor */ + ~CLDeconvolutionReshapeOutputKernel() = default; + + /** Initialise the kernel's source and destination. + * + * @param[in] input Input tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[in] bias Bias tensor to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[out] output Output tensor with the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size] + * Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] input_info Deconvolution input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. + */ + void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info); + /** Initialise the kernel's source and destination. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[in] bias Bias tensor to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[out] output Output tensor with the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size] + * Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] input_info Deconvolution input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, + const PadStrideInfo &deconv_info); + + /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionReshapeOutputKernel. + * + * @param[in] input GEMM output tensor info to be reshaped. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[in] bias (Optional) Optional bias tensor info to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] output Reshaped output tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] input_info Original input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] weights_info Original weights tensor info output. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. + * + * @return a Status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + bool _add_bias; + const ICLTensor *_bias; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H */ diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp index cb5d727e9b..78adfd202f 100644 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h" +#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h new file mode 100644 index 0000000000..6c73bd4bf4 --- /dev/null +++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H +#define ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +/** Interface for the depth concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class CLDepthConcatenateLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthConcatenateLayerKernel(const CLDepthConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthConcatenateLayerKernel &operator=(const CLDepthConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDepthConcatenateLayerKernel(CLDepthConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDepthConcatenateLayerKernel &operator=(CLDepthConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~CLDepthConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] depth_offset The offset on the Z axis. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; + +private: + unsigned int _depth_offset; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H */ diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp index 452a14bd29..c98d66f390 100644 --- a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.h b/src/core/CL/kernels/CLDepthConvertLayerKernel.h new file mode 100644 index 0000000000..8b511c6707 --- /dev/null +++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H +#define ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLSimple3DKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the depth conversion kernel. */ +class CLDepthConvertLayerKernel : public ICLSimple3DKernel +{ +public: + /** Set the input and output of the kernel. + * + * Valid conversions Input -> Output : + * + * - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data) + * - U8 -> S8, U16, S16, U32, S32, F16, F32 + * - U16 -> U8, S8, S16, U32, S32, F16, F32 + * - S16 -> U8, S8, U16, U32, S32, F16, F32 + * - U32 -> U8, S8, U16, S16, S32, F16, F32 + * - S32 -> U8, S8, U16, S16, U32, F16, F32 + * - F16 -> U8, S8, U16, S16, U32, F32 + * - F32 -> U8, S8, U16, S16, U32, F16 + * + * @param[in] input The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32. + * @param[out] output The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); + /** Set the input and output of the kernel. + * + * Valid conversions Input -> Output : + * + * - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data) + * - U8 -> S8, U16, S16, U32, S32, F16, F32 + * - U16 -> U8, S8, S16, U32, S32, F16, F32 + * - S16 -> U8, S8, U16, U32, S32, F16, F32 + * - U32 -> U8, S8, U16, S16, S32, F16, F32 + * - S32 -> U8, S8, U16, S16, U32, F16, F32 + * - F16 -> U8, S8, U16, S16, U32, F32 + * - F32 -> U8, S8, U16, S16, U32, F16 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32. + * @param[out] output The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayerKernel + * + * @param[in] input Source tensor info. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32. + * @param[in] output Destination tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift); +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H */ diff --git a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp index c3a40a286a..8946f2a713 100644 --- a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h" +#include "src/core/CL/kernels/CLDepthToSpaceLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.h b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.h new file mode 100644 index 0000000000..1f7f77b569 --- /dev/null +++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H +#define ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the depth to space kernel */ +class CLDepthToSpaceLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthToSpaceLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthToSpaceLayerKernel(const CLDepthToSpaceLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthToSpaceLayerKernel &operator=(const CLDepthToSpaceLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDepthToSpaceLayerKernel(CLDepthToSpaceLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDepthToSpaceLayerKernel &operator=(CLDepthToSpaceLayerKernel &&) = default; + /** Default destructor */ + ~CLDepthToSpaceLayerKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + */ + void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape); + /** Initialise the kernel's inputs and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape); + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayerKernel. + * + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. + * @param[in] output Tensor output info. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + ICLTensor *_output; /**< Destination tensor */ + int32_t _block_shape; /**< Block shape */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp index 7958230aac..c928677f30 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp @@ -21,11 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" @@ -34,6 +33,7 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "src/core/AccessWindowStatic.h" #include "src/core/CL/CLValidate.h" +#include "src/core/CL/ICLKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h new file mode 100644 index 0000000000..45b5869676 --- /dev/null +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H +#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H + +#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NCHW. + */ +class CLDepthwiseConvolutionLayer3x3NCHWKernel : public ICLDepthwiseConvolutionLayer3x3Kernel +{ +public: + /** Default constructor */ + CLDepthwiseConvolutionLayer3x3NCHWKernel(); + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NCHWKernel + * + * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor info. A 3D tensor with dimensions [3, 3, IFM]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[in] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. + * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor info for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, + const Size2D &dilation = Size2D(1U, 1U), const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr); + + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + unsigned int _conv_stride_x; + unsigned int _conv_pad_top; + unsigned int _conv_pad_left; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H */ diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp index 876ef1ec5d..0b673ccdba 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp @@ -21,11 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" @@ -34,6 +33,7 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "src/core/AccessWindowStatic.h" #include "src/core/CL/CLValidate.h" +#include "src/core/CL/ICLKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h new file mode 100644 index 0000000000..ce0bf5ceb3 --- /dev/null +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H +#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H + +#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NHWC. + */ +class CLDepthwiseConvolutionLayer3x3NHWCKernel : public ICLDepthwiseConvolutionLayer3x3Kernel +{ +public: + /** Default constructor */ + CLDepthwiseConvolutionLayer3x3NHWCKernel(); + /** Default move assignment operator. */ + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel + * + * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, 3, 3]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[in] output Destination tensor info. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor info for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), + const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + unsigned int _num_planes_processed_per_iteration; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H */ diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp index 4580968d38..748f4a3848 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp @@ -21,11 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" @@ -33,6 +32,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "src/core/CL/CLValidate.h" +#include "src/core/CL/ICLKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h new file mode 100644 index 0000000000..325f4e7067 --- /dev/null +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H +#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a MxN depthwise convolution. M and N are respectively the rows and columns of the filter + This kernel assumes that tensor for the weights is NOT reshaped (Native version) */ +class CLDepthwiseConvolutionLayerNativeKernel : public ICLKernel +{ +public: + /** Default Constructor */ + CLDepthwiseConvolutionLayerNativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayerNativeKernel(const CLDepthwiseConvolutionLayerNativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayerNativeKernel &operator=(const CLDepthwiseConvolutionLayerNativeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDepthwiseConvolutionLayerNativeKernel(CLDepthwiseConvolutionLayerNativeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDepthwiseConvolutionLayerNativeKernel &operator=(CLDepthwiseConvolutionLayerNativeKernel &&) = default; + /** Initialize the function's source, destination and parameters + * + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC + * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, N, M]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread + * @param[in] dwc_info Depthwise convolution layer info + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info, + const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); + /** Initialize the function's source, destination and parameters + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC + * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, N, M]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread + * @param[in] dwc_info Depthwise convolution layer info + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info, + const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel + * + * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC + * @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, N, M]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[in] output Destination tensor info. Data type supported: Same as @p input. + * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread + * @param[in] dwc_info Depthwise convolution layer info + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCWeightsKernelInfo &dwc_weights_info, + const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U), + const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_weights; + const ICLTensor *_biases; + ICLTensor *_output; + unsigned int _depth_multiplier; + const ICLTensor *_output_multipliers; + const ICLTensor *_output_shifts; + bool _is_quantized; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */ diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp index 0ff3c520ba..b10c23bde9 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp @@ -21,11 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" @@ -33,6 +32,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/AccessWindowStatic.h" #include "src/core/CL/CLValidate.h" +#include "src/core/CL/ICLKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h new file mode 100644 index 0000000000..650fe9a11b --- /dev/null +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H +#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to reshape the weights of depthwise convolution. */ +class CLDepthwiseConvolutionLayerReshapeWeightsKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthwiseConvolutionLayerReshapeWeightsKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayerReshapeWeightsKernel(const CLDepthwiseConvolutionLayerReshapeWeightsKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayerReshapeWeightsKernel &operator=(const CLDepthwiseConvolutionLayerReshapeWeightsKernel &) = delete; + /** Default Move Constructor. */ + CLDepthwiseConvolutionLayerReshapeWeightsKernel(CLDepthwiseConvolutionLayerReshapeWeightsKernel &&) = default; + /** Default move assignment operator */ + CLDepthwiseConvolutionLayerReshapeWeightsKernel &operator=(CLDepthwiseConvolutionLayerReshapeWeightsKernel &&) = default; + + /** Initialize the function's source and destination. + * + * @param[in] input The input tensor of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC + * @param[out] output The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights. + * @param[in] info Depthwise convolution information to reshape the input tensor. + */ + void configure(const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info); + /** Initialize the function's source and destination. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC + * @param[out] output The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights. + * @param[in] info Depthwise convolution information to reshape the input tensor. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info); + + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel + * + * @param[in] input The input tensor info of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC + * @param[in] output The output tensor info of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights. + * @param[in] info Depthwise convolution information to reshape the input tensor. + * + * @return a Status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const DepthwiseConvolutionReshapeInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + + void configure_dot_product(const DepthwiseConvolutionReshapeInfo &info); + void configure_generic(const DepthwiseConvolutionReshapeInfo &info); +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H */ diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp index e2c49fbf66..3723c651fe 100644 --- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h" +#include "src/core/CL/kernels/CLDequantizationLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.h b/src/core/CL/kernels/CLDequantizationLayerKernel.h new file mode 100644 index 0000000000..5579b5bc71 --- /dev/null +++ b/src/core/CL/kernels/CLDequantizationLayerKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the dequantization layer kernel. */ +class CLDequantizationLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDequantizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDequantizationLayerKernel(const CLDequantizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDequantizationLayerKernel &operator=(const CLDequantizationLayerKernel &) = delete; + /** Default Move Constructor. */ + CLDequantizationLayerKernel(CLDequantizationLayerKernel &&) = default; + /** Default move assignment operator */ + CLDequantizationLayerKernel &operator=(CLDequantizationLayerKernel &&) = default; + /** Default destructor */ + ~CLDequantizationLayerKernel() = default; + /** Set the input, output, min and max. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[out] output Destination tensor. Data types supported: F16/F32. + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input, output, min and max. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[out] output Destination tensor. Data types supported: F16/F32. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel + * + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] output Output tensor info. Data types supported: F16/F32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLDerivativeKernel.cpp b/src/core/CL/kernels/CLDerivativeKernel.cpp index 659a7cb209..5ff11362cc 100644 --- a/src/core/CL/kernels/CLDerivativeKernel.cpp +++ b/src/core/CL/kernels/CLDerivativeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h" +#include "src/core/CL/kernels/CLDerivativeKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLDerivativeKernel.h b/src/core/CL/kernels/CLDerivativeKernel.h new file mode 100644 index 0000000000..14dd05d084 --- /dev/null +++ b/src/core/CL/kernels/CLDerivativeKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDERIVATIVEKERNEL_H +#define ARM_COMPUTE_CLDERIVATIVEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the derivative kernel. */ +class CLDerivativeKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDerivativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDerivativeKernel(const CLDerivativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDerivativeKernel &operator=(const CLDerivativeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDerivativeKernel(CLDerivativeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDerivativeKernel &operator=(CLDerivativeKernel &&) = default; + /** Default destructor */ + ~CLDerivativeKernel() = default; + /** Initialise the kernel's sources, destination and border + * + * @note At least one of output_x or output_y must be set + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's sources, destination and border + * + * @note At least one of output_x or output_y must be set + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< Output tensor - Derivate along the X direction */ + ICLTensor *_output_y; /**< Output tensor - Derivate along the Y direction */ + bool _run_derivative_x; /**< Do we need to run Derivative X ? */ + bool _run_derivative_y; /**< Do we need to run Derivative Y ? */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDERIVATIVEKERNEL_H */ diff --git a/src/core/CL/kernels/CLDilateKernel.cpp b/src/core/CL/kernels/CLDilateKernel.cpp index 1e59c349e7..cac5bc1c72 100644 --- a/src/core/CL/kernels/CLDilateKernel.cpp +++ b/src/core/CL/kernels/CLDilateKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDilateKernel.h" +#include "src/core/CL/kernels/CLDilateKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLDilateKernel.h b/src/core/CL/kernels/CLDilateKernel.h new file mode 100644 index 0000000000..591ec8ccfc --- /dev/null +++ b/src/core/CL/kernels/CLDilateKernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDILATEKERNEL_H +#define ARM_COMPUTE_CLDILATEKERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the dilate kernel. + * + */ +class CLDilateKernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /**Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDILATEKERNEL_H */ diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index 161b221e81..a642eabc4e 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h" +#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.h b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.h new file mode 100644 index 0000000000..5cd674f631 --- /dev/null +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H +#define ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the direct convolution kernel. + */ +class CLDirectConvolutionLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDirectConvolutionLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDirectConvolutionLayerKernel(const CLDirectConvolutionLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDirectConvolutionLayerKernel &operator=(const CLDirectConvolutionLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDirectConvolutionLayerKernel(CLDirectConvolutionLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDirectConvolutionLayerKernel &operator=(CLDirectConvolutionLayerKernel &&) = default; + /** Default destructor */ + ~CLDirectConvolutionLayerKernel() = default; + /** Set the input, weights, biases and output tensors. + * + * @note: DirectConvolution only works in the following configurations: + * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 + * 3x3 convolution with stride_x = 1/2, stride_y = 1/2 + * 5x5 convolution with stride_x = 1/2, stride_y = 1/2 + * 9x9 convolution with stride_x = 1/2, stride_y = 1/2 + * + * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * The 3rd dimension must be the same as the input's volume 3rd dimension. + * Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type + * @param[out] output Output tensor. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); + /** Set the input, weights, biases and output tensors. + * + * @note: DirectConvolution only works in the following configurations: + * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 + * 3x3 convolution with stride_x = 1/2, stride_y = 1/2 + * 5x5 convolution with stride_x = 1/2, stride_y = 1/2 + * 9x9 convolution with stride_x = 1/2, stride_y = 1/2 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * The 3rd dimension must be the same as the input's volume 3rd dimension. + * Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type + * @param[out] output Output tensor. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayerKernel + * + * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * The 3rd dimension must be the same as the input's volume 3rd dimension. + * Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type. + * @param[in] output Output tensor. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] target Target GPU architecture. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const GPUTarget target); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +public: + const ICLTensor *_input; + const ICLTensor *_biases; + const ICLTensor *_weights; + ICLTensor *_output; + DataLayout _data_layout; + BorderSize _border_size; + int _conv_stride_x; + int _conv_stride_y; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp index bff0db07a0..38a7f1bae1 100644 --- a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp +++ b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h" +#include "src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h new file mode 100644 index 0000000000..95b5872796 --- /dev/null +++ b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H +#define ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" +#include "src/core/CL/ICLSimpleKernel.h" + +namespace arm_compute +{ +/** Interface for the elementwise unary operator */ +class CLElementWiseUnaryLayerKernel : public ICLKernel +{ +public: + /** Initialise the kernel's inputs, output. + * + * @param[in] input First tensor input info. Data types supported: F16/F32. + * @param[out] output Output tensor info. Data types supported: Same as @p input. + * @param[in] op Element wise unary operation to perform. + */ + void configure(const ITensorInfo *input, ITensorInfo *output, const ElementWiseUnary &op); + /** Initialise the kernel's inputs, output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input First tensor input info. Data types supported: F16/F32. + * @param[out] output Output tensor info. Data types supported: Same as @p input. + * @param[in] op Element wise unary operation to perform. + */ + void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const ElementWiseUnary &op); + /** Static function to check if given info will lead to a valid configuration of @ref CLElementWiseUnaryLayerKernel + * + * @param[in] input First tensor input info. Data types supported: F16/F32. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * @param[in] op Element wise unary operation to perform. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ElementWiseUnary &op); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp index da28f3d886..896ee119c1 100644 --- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp +++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" +#include "src/core/CL/kernels/CLElementwiseOperationKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.h b/src/core/CL/kernels/CLElementwiseOperationKernel.h new file mode 100644 index 0000000000..75030cf3a3 --- /dev/null +++ b/src/core/CL/kernels/CLElementwiseOperationKernel.h @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H +#define ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for an element-wise operation kernel + * + * Element-wise operation is computed by: + * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f] + * + */ +class CLElementwiseOperationKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLElementwiseOperationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLElementwiseOperationKernel(const CLElementwiseOperationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLElementwiseOperationKernel &operator=(const CLElementwiseOperationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLElementwiseOperationKernel(CLElementwiseOperationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLElementwiseOperationKernel &operator=(CLElementwiseOperationKernel &&) = default; + /** Default destructor */ + ~CLElementwiseOperationKernel() = default; + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; + +protected: + /** The name of the operation */ + virtual std::string name() = 0; + + /** Initialise the kernel's output. + * + * @param[in] input1 First tensor input info. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * + * @return a pair of Status and Window + */ + virtual std::pair validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) = 0; + + /** Generate the build options for the specific kernel + * + * @reutrn a CLBuildOptions struct + */ + virtual CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) = 0; + + /** Generate the identifier for tuning + * + * @reutrn a string + */ + virtual std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) = 0; + + /** Commmon configure function for element-wise operators with no additional options (e.g., Div, Min, Max, SquaredDiff) + * + */ + void configure_common(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output); + /** Commmon configure function for element-wise operators with no additional options (e.g., Div, Min, Max, SquaredDiff) + * + */ + void configure_common(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output); + + ActivationLayerInfo _act_info; + +private: + const ITensorInfo *_input1; /**< Source tensor info 1 */ + const ITensorInfo *_input2; /**< Source tensor info 2 */ + ITensorInfo *_output; /**< Destination tensor info */ +}; + +/** Addition operation */ +class CLSaturatedArithmeticOperationKernel : public CLElementwiseOperationKernel +{ +public: + CLSaturatedArithmeticOperationKernel() + : CLElementwiseOperationKernel(), _policy(), _op() + { + } + + /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] policy Policy to use to handle overflow. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ConvertPolicy &policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] policy Policy to use to handle overflow. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ConvertPolicy &policy, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] input2 Second tensor input info info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info info. Data types supported: Same as @p input1. + * @param[in] policy Policy to use to handle overflow. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * + * @return a Status + */ + static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ConvertPolicy &policy, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + +protected: + // Inherited methods overridden: + std::string name() override; + std::pair validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) override; + CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override; + std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) override; + +private: + ConvertPolicy _policy; + ArithmeticOperation _op; +}; + +class CLArithmeticOperationKernel : public CLElementwiseOperationKernel +{ +public: + CLArithmeticOperationKernel() + : CLElementwiseOperationKernel(), _op() + { + } + + /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * + * @return a Status + */ + static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + +protected: + // Inherited methods overridden: + std::string name() override; + std::pair validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) override; + CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override; + std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) override; + +private: + ArithmeticOperation _op; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLErodeKernel.cpp b/src/core/CL/kernels/CLErodeKernel.cpp index 29a32979a3..f6d98a5488 100644 --- a/src/core/CL/kernels/CLErodeKernel.cpp +++ b/src/core/CL/kernels/CLErodeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLErodeKernel.h" +#include "src/core/CL/kernels/CLErodeKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLErodeKernel.h b/src/core/CL/kernels/CLErodeKernel.h new file mode 100644 index 0000000000..4da97ae358 --- /dev/null +++ b/src/core/CL/kernels/CLErodeKernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLERODEKERNEL_H +#define ARM_COMPUTE_CLERODEKERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the erode kernel. + * + */ +class CLErodeKernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /**Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLERODEKERNEL_H */ diff --git a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp index 0478f550f9..922e50aa73 100644 --- a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp +++ b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h" +#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLFFTDigitReverseKernel.h b/src/core/CL/kernels/CLFFTDigitReverseKernel.h new file mode 100644 index 0000000000..2e2f1bdff4 --- /dev/null +++ b/src/core/CL/kernels/CLFFTDigitReverseKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H +#define ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** Interface for the digit reverse operation kernel. */ +class CLFFTDigitReverseKernel : public ICLKernel +{ +public: + /** Constructor */ + CLFFTDigitReverseKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFFTDigitReverseKernel(const CLFFTDigitReverseKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFFTDigitReverseKernel &operator=(const CLFFTDigitReverseKernel &) = delete; + /** Default Move Constructor. */ + CLFFTDigitReverseKernel(CLFFTDigitReverseKernel &&) = default; + /** Default move assignment operator */ + CLFFTDigitReverseKernel &operator=(CLFFTDigitReverseKernel &&) = default; + /** Default destructor */ + ~CLFFTDigitReverseKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] idx Digit reverse index tensor. Data type supported: U32 + * @param[in] config Kernel configuration. + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] idx Digit reverse index tensor. Data type supported: U32 + * @param[in] config Kernel configuration. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config); + /** Static function to check if given info will lead to a valid configuration of @ref CLFFTDigitReverseKernel + * + * @param[in] input Source tensor info. Data types supported: F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] idx Digit reverse index tensor info. Data type supported: U32 + * @param[in] config Kernel configuration. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_idx; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H */ diff --git a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp index 7b17a227e1..0f06640b64 100644 --- a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp +++ b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h" +#include "src/core/CL/kernels/CLFFTRadixStageKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLFFTRadixStageKernel.h b/src/core/CL/kernels/CLFFTRadixStageKernel.h new file mode 100644 index 0000000000..c3cc510bdd --- /dev/null +++ b/src/core/CL/kernels/CLFFTRadixStageKernel.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H +#define ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +#include + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** Interface for the FFT radix stage kernel. */ +class CLFFTRadixStageKernel : public ICLKernel +{ +public: + /** Constructor */ + CLFFTRadixStageKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFFTRadixStageKernel(const CLFFTRadixStageKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFFTRadixStageKernel &operator=(const CLFFTRadixStageKernel &) = delete; + /** Default Move Constructor. */ + CLFFTRadixStageKernel(CLFFTRadixStageKernel &&) = default; + /** Default move assignment operator */ + CLFFTRadixStageKernel &operator=(CLFFTRadixStageKernel &&) = default; + /** Default destructor */ + ~CLFFTRadixStageKernel() = default; + /** Set the input and output tensors. + * + * @note If the output tensor is nullptr, the FFT will be performed in-place + * + * @param[in,out] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input + * @param[in] config FFT descriptor metadata. + */ + void configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config); + /** Set the input and output tensors. + * + * @note If the output tensor is nullptr, the FFT will be performed in-place + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input + * @param[in] config FFT descriptor metadata. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config); + /** Static function to check if given info will lead to a valid configuration of @ref CLFFTRadixStageKernel + * + * @param[in] input Source tensor info. Data types supported: F32. + * @param[in] output Destination tensor info. Can be nullptr. Data type supported: same as @p input + * @param[in] config FFT descriptor metadata. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config); + /** Returns the radix that are support by the FFT kernel + * + * @return A set of supported radix + */ + static std::set supported_radix(); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; + bool _run_in_place; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H */ diff --git a/src/core/CL/kernels/CLFFTScaleKernel.cpp b/src/core/CL/kernels/CLFFTScaleKernel.cpp index 49fcbb6c7b..4dbe8d2e86 100644 --- a/src/core/CL/kernels/CLFFTScaleKernel.cpp +++ b/src/core/CL/kernels/CLFFTScaleKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h" +#include "src/core/CL/kernels/CLFFTScaleKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLFFTScaleKernel.h b/src/core/CL/kernels/CLFFTScaleKernel.h new file mode 100644 index 0000000000..cb007e5307 --- /dev/null +++ b/src/core/CL/kernels/CLFFTScaleKernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLFFTSCALEKERNEL_H +#define ARM_COMPUTE_CLFFTSCALEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** Interface for the inverse fft scale kernel. */ +class CLFFTScaleKernel : public ICLKernel +{ +public: + /** Constructor */ + CLFFTScaleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFFTScaleKernel(const CLFFTScaleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFFTScaleKernel &operator=(const CLFFTScaleKernel &) = delete; + /** Default Move Constructor. */ + CLFFTScaleKernel(CLFFTScaleKernel &&) = default; + /** Default move assignment operator */ + CLFFTScaleKernel &operator=(CLFFTScaleKernel &&) = default; + /** Default destructor */ + ~CLFFTScaleKernel() = default; + /** Set the input and output tensors. + * + * @param[in,out] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] config Kernel configuration + */ + void configure(ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] config Kernel configuration + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config); + /** Static function to check if given info will lead to a valid configuration of @ref CLFFTScaleKernel + * + * @param[in] input Source tensor info. Data types supported: F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] config Kernel configuration + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; + bool _run_in_place; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLFFTSCALEKERNEL_H */ diff --git a/src/core/CL/kernels/CLFastCornersKernel.cpp b/src/core/CL/kernels/CLFastCornersKernel.cpp index ebdfd2741f..7481fd1c27 100644 --- a/src/core/CL/kernels/CLFastCornersKernel.cpp +++ b/src/core/CL/kernels/CLFastCornersKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h" +#include "src/core/CL/kernels/CLFastCornersKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLFastCornersKernel.h b/src/core/CL/kernels/CLFastCornersKernel.h new file mode 100644 index 0000000000..0c1b564c2f --- /dev/null +++ b/src/core/CL/kernels/CLFastCornersKernel.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLFASTCORNERSKERNEL_H +#define ARM_COMPUTE_CLFASTCORNERSKERNEL_H + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +#include + +namespace cl +{ +class Buffer; +} + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** CL kernel to perform fast corners */ +class CLFastCornersKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLFastCornersKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFastCornersKernel(const CLFastCornersKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFastCornersKernel &operator=(const CLFastCornersKernel &) = delete; + /** Allow instances of this class to be moved */ + CLFastCornersKernel(CLFastCornersKernel &&) = default; + /** Allow instances of this class to be moved */ + CLFastCornersKernel &operator=(CLFastCornersKernel &&) = default; + /** Default destructor */ + ~CLFastCornersKernel() = default; + + /** Initialise the kernel. + * + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Output image. Data types supported: U8. + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. + * @param[in] border_mode Strategy to use for borders. + */ + void configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode); + /** Initialise the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Output image. Data types supported: U8. + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. + * @param[in] border_mode Strategy to use for borders. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode); + + // Inherited methods overridden + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLImage *_input; + ICLImage *_output; +}; + +/** CL kernel to copy keypoints information to ICLKeyPointArray and counts the number of key points */ +class CLCopyToArrayKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLCopyToArrayKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCopyToArrayKernel(const CLCopyToArrayKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCopyToArrayKernel &operator=(const CLCopyToArrayKernel &) = delete; + /** Allow instances of this class to be moved */ + CLCopyToArrayKernel(CLCopyToArrayKernel &&) = default; + /** Allow instances of this class to be moved */ + CLCopyToArrayKernel &operator=(CLCopyToArrayKernel &&) = default; + /** Default destructor */ + ~CLCopyToArrayKernel() = default; + + /** Initialise the kernel. + * + * @param[in] input Source image. Data types supported: U8. + * @param[in] update_number Flag to indicate whether we need to update the number of corners + * @param[out] corners Array of keypoints to store the results. + * @param[out] num_buffers Number of keypoints to store the results. + */ + void configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers); + /** Initialise the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source image. Data types supported: U8. + * @param[in] update_number Flag to indicate whether we need to update the number of corners + * @param[out] corners Array of keypoints to store the results. + * @param[out] num_buffers Number of keypoints to store the results. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; /**< source image */ + ICLKeyPointArray *_corners; /**< destination array */ + cl::Buffer *_num_buffer; /**< CL memory to record number of key points in the array */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLFASTCORNERSKERNEL_H */ diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp index e92619a242..5d77c291d7 100644 --- a/src/core/CL/kernels/CLFillBorderKernel.cpp +++ b/src/core/CL/kernels/CLFillBorderKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLFillBorderKernel.h b/src/core/CL/kernels/CLFillBorderKernel.h new file mode 100644 index 0000000000..7951f48171 --- /dev/null +++ b/src/core/CL/kernels/CLFillBorderKernel.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLFILLBORDERKERNEL_H +#define ARM_COMPUTE_CLFILLBORDERKERNEL_H + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for filling the border of a kernel */ +class CLFillBorderKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLFillBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFillBorderKernel(const CLFillBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFillBorderKernel &operator=(const CLFillBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + CLFillBorderKernel(CLFillBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + CLFillBorderKernel &operator=(CLFillBorderKernel &&) = default; + /** Default destructor */ + ~CLFillBorderKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + /** Initialise the kernel's input, output and border mode. + * + * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + + /** Function to set the constant value on fill border kernel depending on type. + * + * @param[in] idx Index of the kernel argument to set. + * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT. + */ + template + void set_constant_border(unsigned int idx, const PixelValue &constant_border_value); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; + void run(const Window &window, cl::CommandQueue &queue) override; + bool is_parallelisable() const override; + +private: + ICLTensor *_tensor; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLFILLBORDERKERNEL_H */ diff --git a/src/core/CL/kernels/CLFlattenLayerKernel.cpp b/src/core/CL/kernels/CLFlattenLayerKernel.cpp index 590fcee6fd..b3f84b6928 100644 --- a/src/core/CL/kernels/CLFlattenLayerKernel.cpp +++ b/src/core/CL/kernels/CLFlattenLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h" +#include "src/core/CL/kernels/CLFlattenLayerKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" diff --git a/src/core/CL/kernels/CLFlattenLayerKernel.h b/src/core/CL/kernels/CLFlattenLayerKernel.h new file mode 100644 index 0000000000..2471cf2e4a --- /dev/null +++ b/src/core/CL/kernels/CLFlattenLayerKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLFLATTENLAYERKERNEL_H +#define ARM_COMPUTE_CLFLATTENLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL interface for the flatten kernel.*/ +class CLFlattenLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLFlattenLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFlattenLayerKernel(const CLFlattenLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFlattenLayerKernel &operator=(const CLFlattenLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLFlattenLayerKernel(CLFlattenLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLFlattenLayerKernel &operator=(CLFlattenLayerKernel &&) = default; + /** Set the input and output of the kernel. + * + * @param[in] input First input tensor to flatten with at least 3 dimensions. + * The dimensions above the third will be interpreted as batches. Data types supported: All. + * @param[out] output Output tensor with shape [w*h*d, input_batches] where: + * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input and output of the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input First input tensor to flatten with at least 3 dimensions. + * The dimensions above the third will be interpreted as batches. Data types supported: All. + * @param[out] output Output tensor with shape [w*h*d, input_batches] where: + * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLFlattenLayerKernel + * + * @param[in] input First input tensor to flatten with at least 3 dimensions. + * The dimensions above the third will be interpreted as batches. Data types supported: All. + * @param[out] output Output tensor with shape [w*h*d, input_batches] where: + * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +public: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLFLATTENLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLFloorKernel.cpp b/src/core/CL/kernels/CLFloorKernel.cpp index 8884f3fe36..2af0089bf0 100644 --- a/src/core/CL/kernels/CLFloorKernel.cpp +++ b/src/core/CL/kernels/CLFloorKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFloorKernel.h" +#include "src/core/CL/kernels/CLFloorKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLFloorKernel.h b/src/core/CL/kernels/CLFloorKernel.h new file mode 100644 index 0000000000..f5635141e4 --- /dev/null +++ b/src/core/CL/kernels/CLFloorKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLFLOORKERNEL_H +#define ARM_COMPUTE_CLFLOORKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a floor operation */ +class CLFloorKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLFloorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFloorKernel(const CLFloorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFloorKernel &operator=(const CLFloorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLFloorKernel(CLFloorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLFloorKernel &operator=(CLFloorKernel &&) = default; + /** Default destructor */ + ~CLFloorKernel() = default; + /** Set the source, destination of the kernel + * + * @param[in] input Source tensor. Data type supported: F16/F32. + * @param[out] output Destination tensor. Same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); + + /** Set the source, destination of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: F16/F32. + * @param[out] output Destination tensor. Same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + + /** Static function to check if given info will lead to a valid configuration of @ref CLFloorKernel + * + * @param[in] input Source tensor info. Data type supported: F16/F32. + * @param[in] output Destination tensor info. Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLFLOORKERNEL_H */ diff --git a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp index 357231940b..2116239080 100644 --- a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h" +#include "src/core/CL/kernels/CLFuseBatchNormalizationKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.h b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.h new file mode 100644 index 0000000000..78b1e74cab --- /dev/null +++ b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H +#define ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** OpenCL kernel to fuse the batch normalization node to a preceding convolution node */ +class CLFuseBatchNormalizationKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLFuseBatchNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFuseBatchNormalizationKernel(const CLFuseBatchNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFuseBatchNormalizationKernel &operator=(const CLFuseBatchNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLFuseBatchNormalizationKernel(CLFuseBatchNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLFuseBatchNormalizationKernel &operator=(CLFuseBatchNormalizationKernel &&) = default; + /** Default destructor */ + ~CLFuseBatchNormalizationKernel() = default; + /** Set the source, destination of the kernel + * + * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC + * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights + * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights + * @param[out] fused_weights Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights + * @param[out] fused_bias Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights + * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights + * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights + * @note if nullptr, bn_beta is set to 0.0 + * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights + * @note if nullptr, bn_gamma is set to 1.0 + * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. + * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. + */ + void configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias, + const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr, + float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + /** Set the source, destination of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC + * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights + * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights + * @param[out] fused_weights Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights + * @param[out] fused_bias Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights + * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights + * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights + * @note if nullptr, bn_beta is set to 0.0 + * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights + * @note if nullptr, bn_gamma is set to 1.0 + * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. + * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias, + const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr, + float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + /** Static function to check if given info will lead to a valid configuration of @ref CLFuseBatchNormalizationKernel + * + * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC + * @param[in] bn_mean Batch normalization layer mean tensor info. Same as @p input_weights + * @param[in] bn_var Batch normalization layer variance tensor info. Same as @p input_weights + * @param[in] fused_weights Output fused weights tensor info. It can be a nullptr in case of in-place computation. Same as @p input_weights + * @param[in] fused_bias Output fused bias tensor info. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights + * @param[in] input_bias (Optional) Input bias tensor info for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights + * @param[in] bn_beta (Optional) Batch normalization layer beta tensor info. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights + * @note if nullptr, bn_beta is set to 0.0 + * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor info. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights + * @note if nullptr, bn_gamma is set to 1.0 + * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. + * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. + * + * @return a status + */ + static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, + const ITensorInfo *fused_weights, const ITensorInfo *fused_bias, + const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr, + float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input_weights; + const ICLTensor *_input_bias; + const ICLTensor *_bn_mean; + const ICLTensor *_bn_var; + const ICLTensor *_bn_gamma; + const ICLTensor *_bn_beta; + ICLTensor *_fused_weights; + ICLTensor *_fused_bias; + float _epsilon; + bool _run_in_place_weights; + bool _run_in_place_bias; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp index af7755b4e4..1f89865908 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h new file mode 100644 index 0000000000..125f0c6948 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply matrices with QASYMM8/QASYMM8_SIGNED data type */ +class CLGEMMLowpMatrixMultiplyNativeKernel : public ICLKernel +{ +public: + /** Default Constructor */ + CLGEMMLowpMatrixMultiplyNativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyNativeKernel(const CLGEMMLowpMatrixMultiplyNativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyNativeKernel &operator=(const CLGEMMLowpMatrixMultiplyNativeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyNativeKernel(CLGEMMLowpMatrixMultiplyNativeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyNativeKernel &operator=(CLGEMMLowpMatrixMultiplyNativeKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, + const GEMMReshapeInfo &gemm_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyNativeKernel + * + * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] input1 Input tensor info for the RHS matrix. Data type supported: same as @p input0 + * @param[in] output Output tensor info. Data type supported: S32 + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, + const GEMMReshapeInfo &gemm_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; + bool _slide_matrix_b; + bool _reinterpret_input_as_3d; + bool _reinterpret_output_as_3d; + bool _use_dummy_work_items; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H*/ diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp index 713d822f9b..ded4b29ae7 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h new file mode 100644 index 0000000000..100100b1b1 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply matrices when both the input matrices LHS (input0) and RHS (input1) have been reshaped + * + * @note The input matrices @p input0 and @p input1 must be reshaped through @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel + */ +class CLGEMMLowpMatrixMultiplyReshapedKernel : public ICLKernel +{ +public: + /** Default Constructor */ + CLGEMMLowpMatrixMultiplyReshapedKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyReshapedKernel(const CLGEMMLowpMatrixMultiplyReshapedKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyReshapedKernel &operator=(const CLGEMMLowpMatrixMultiplyReshapedKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyReshapedKernel(CLGEMMLowpMatrixMultiplyReshapedKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyReshapedKernel &operator=(CLGEMMLowpMatrixMultiplyReshapedKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 + * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.transpose: false + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * rhs_info.transpose: true + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @note lhs_info.k0 must be equal to rhs_info.k0 + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 + * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.transpose: false + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * rhs_info.transpose: true + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @note lhs_info.k0 must be equal to rhs_info.k0 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, + const GEMMReshapeInfo &gemm_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedKernel + * + * @param[in] input0 Input tensor info containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor info containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[in] output Output tensor info. Data type supported: S32 + * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.transpose: false + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: 2,3,4,8,16 + * rhs_info.transpose: true + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @note lhs_info.k0 must be equal to rhs_info.k0 + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, + const GEMMReshapeInfo &gemm_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; + bool _slide_matrix_b; + bool _reinterpret_output_as_3d; + unsigned int _k; + bool _use_dummy_work_items; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H*/ diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp index 33fb903813..95aa30d14a 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" @@ -195,11 +195,11 @@ std::pair validate_and_configure_window(ITensorInfo *input0, ITe { ARM_COMPUTE_UNUSED(vector_sum_row, vector_sum_col, output_multipliers, bias, output_shifts); - const GEMMLowpOutputStageInfo output_stage = gemm_info.output_stage; - unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0]; - unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1]; - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d; - bool reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d != 0); + const GEMMLowpOutputStageInfo output_stage = gemm_info.output_stage; + unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0]; + unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1]; + bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d; + bool reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d != 0); Window win{}; Window win_out{}; @@ -297,7 +297,7 @@ void CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::configure(const CLCompileCon output_multipliers != nullptr ? output_multipliers->info() : nullptr, output_shifts != nullptr ? output_shifts->info() : nullptr)); - auto padding_info = get_padding_info({ input0, input1, output, vector_sum_col, vector_sum_row, bias, output_multipliers, output_shifts }); + auto padding_info = get_padding_info({ input0, input1, output, vector_sum_col, vector_sum_row, bias, output_multipliers, output_shifts }); const GEMMRHSMatrixInfo rhs_info = gemm_info.rhs_info; const GEMMLHSMatrixInfo lhs_info = gemm_info.lhs_info; const GEMMLowpOutputStageInfo output_stage = gemm_info.output_stage; @@ -349,7 +349,7 @@ void CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::configure(const CLCompileCon // we will dispatch a batched-GEMM to reduce the complexity of the address calculation within the OpenCL kernel. // This means that the actual m used by the kernel is given by output->info()->dimension(1) and not by gemm_info.m const unsigned int internal_m = _reinterpret_output_as_3d ? gemm_info.m : output->info()->dimension(1); - // Calculate partial (store instead of load) M0 and partial N0 for the partial blocks at the end of a row/column if any. This is to avoid padding. + // Calculate partial (store instead of load) M0 and partial N0 for the partial blocks at the end of a row/column if any. This is to avoid padding. const unsigned int partial_store_m0 = internal_m % lhs_info.m0; const unsigned int partial_store_n0 = gemm_info.n % rhs_info.n0; diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h new file mode 100644 index 0000000000..222a8615e4 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply matrices with QASYMM8 data type when only the input matrix RHS (input1) has been reshaped + * + * @note The input matrix input1 must be reshaped through @ref CLGEMMReshapeRHSMatrixKernel + * @note For fused output stage, only GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT type is supported + */ +class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel : public ICLKernel +{ +public: + /** Default Constructor */ + CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(const CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &operator=(const CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &operator=(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/S32. + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info. + * Only the following values are supported for LHS info: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * Only the following values are supported for RHS info: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * rhs_info.transpose: true + * @param[in] vector_sum_col (Optional) Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32 + * @param[in] vector_sum_row (Optional) Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32 + * @param[in] bias (Optional) Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32. + * @param[in] output_multipliers (Optional) Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32. + * @param[in] output_shifts (Optional) Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32. + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr, + const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0 + * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/S32. + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info. + * Only the following values are supported for LHS info: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * Only the following values are supported for RHS info: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * rhs_info.transpose: true + * @param[in] vector_sum_col (Optional) Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32 + * @param[in] vector_sum_row (Optional) Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32 + * @param[in] bias (Optional) Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32. + * @param[in] output_multipliers (Optional) Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32. + * @param[in] output_shifts (Optional) Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr, + const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel + * + * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] input1 Input tensor info for the RHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[in] output Output tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/S32. + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info. + * Only the following values are supported for LHS info: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * Only the following values are supported for RHS info: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * rhs_info.transpose: true + * @param[in] vector_sum_col (Optional) Input row-vector info of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32 + * @param[in] vector_sum_row (Optional) Input row-vector info of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32 + * @param[in] bias (Optional) Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32. + * @param[in] output_multipliers (Optional) Output multipliers tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32. + * @param[in] output_shifts (Optional) Output shifts tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMKernelInfo &gemm_info, const ITensorInfo *vector_sum_col = nullptr, + const ITensorInfo *vector_sum_row = nullptr, const ITensorInfo *bias = nullptr, const ITensorInfo *output_multipliers = nullptr, + const ITensorInfo *output_shifts = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; + const ICLTensor *_vector_sum_col; + const ICLTensor *_vector_sum_row; + const ICLTensor *_bias; + const ICLTensor *_output_multipliers; + const ICLTensor *_output_shifts; + bool _slide_matrix_b; + bool _reinterpret_input_as_3d; + bool _reinterpret_output_as_3d; + bool _use_dummy_work_items; + bool _is_quantized_per_channel; + bool _fuse_output_stage; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H */ \ No newline at end of file diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp index aa4eea60ca..c7844b9c28 100644 --- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h new file mode 100644 index 0000000000..f8705595a0 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel used to add the offset contribution after the matrix multiplication. The computation is performed in-place + * + * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), + * and adds to it the offset contribution of matrix A and matrix B in-place. + * + * The final result is: + * + * mm_result[i][k] = mm_result[i][k] + + * (vector_sum_col[k] * a_offset) + + * (vector_sum_row[i] * b_offset) + + * (a_offset * b_offset * k) + * + */ +class CLGEMMLowpOffsetContributionKernel : public ICLKernel +{ +public: + /** Constructor */ + CLGEMMLowpOffsetContributionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpOffsetContributionKernel(const CLGEMMLowpOffsetContributionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpOffsetContributionKernel &operator=(const CLGEMMLowpOffsetContributionKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpOffsetContributionKernel(CLGEMMLowpOffsetContributionKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpOffsetContributionKernel &operator=(CLGEMMLowpOffsetContributionKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in, out] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] k Number of matrix A columns or Matrix B rows + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + */ + void configure(ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset, int32_t b_offset); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] k Number of matrix A columns or Matrix B rows + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset, + int32_t b_offset); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel + * + * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * + * @return a status + */ + static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, int32_t a_offset, int32_t b_offset); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_vector_sum_col; + const ICLTensor *_vector_sum_row; + ICLTensor *_mm_result; + const ICLTensor *_bias; +}; +} // namespace arm_compute + +#endif /* ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp index afa7bdbfdf..b41d8704bd 100644 --- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h new file mode 100644 index 0000000000..15f54d17a5 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel used to add the offset contribution after the matrix multiplication and perform the output stage. + * + * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), adds to it the offset contribution + * of matrix A and matrix B and performs the output stage defined by the output_stage argument + * + * @note For quantized computations the output data type for auto-initialization must be passed as part of the @ref GEMMLowpOutputStageInfo. + */ +class CLGEMMLowpOffsetContributionOutputStageKernel : public ICLKernel +{ +public: + /** Constructor */ + CLGEMMLowpOffsetContributionOutputStageKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpOffsetContributionOutputStageKernel(const CLGEMMLowpOffsetContributionOutputStageKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpOffsetContributionOutputStageKernel &operator=(const CLGEMMLowpOffsetContributionOutputStageKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpOffsetContributionOutputStageKernel(CLGEMMLowpOffsetContributionOutputStageKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpOffsetContributionOutputStageKernel &operator=(CLGEMMLowpOffsetContributionOutputStageKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED. + * @param[in] k Number of matrix A columns or Matrix B rows + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_stage GEMMLowp output stage info + * @param[in] output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32 + * @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32 + */ + void configure(const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k, int32_t a_offset, int32_t b_offset, + const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED. + * @param[in] k Number of matrix A columns or Matrix B rows + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_stage GEMMLowp output stage info + * @param[in] output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32 + * @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, + int32_t k, + int32_t a_offset, int32_t b_offset, + const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel + * + * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED. + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_stage GEMMLowp output stage info + * @param[in] output_multipliers Output multipliers tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32 + * @param[in] output_shifts Output shifts tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32 + * + * @return a status + */ + static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, const ITensorInfo *output, int32_t a_offset, + int32_t b_offset, const GEMMLowpOutputStageInfo &output_stage, const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_mm_result; + const ICLTensor *_vector_sum_col; + const ICLTensor *_vector_sum_row; + const ICLTensor *_bias; + ICLTensor *_output; + const ICLTensor *_output_multipliers; + const ICLTensor *_output_shifts; + bool _is_quantized_per_channel; +}; +} // namespace arm_compute + +#endif /* ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H */ diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp index ab1b5a2203..d0f016879e 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h new file mode 100644 index 0000000000..8653102cd8 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFIXEDPOINTKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFIXEDPOINTKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED/QSYMM16 + * + * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final quantized value. + * The following computations will be performed by the kernel: + * + * -# Compute fixed point multiplication between each entry of input by gemmlowp_multiplier + * -# Add bias to final result if bias tensor is not a nullptr + * -# Round to nearest division by a power-of-two using result_shift + * -# Add offset to each result + * -# Clamp the value between the specified min and max bounds + * -# Clamp the resulting int32 values to the proper quantized range and cast to QASYMM8/QASYMM8_SIGNED/QSYMM16. + */ +class CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel : public ICLKernel +{ +public: + /** Constructor */ + CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel(const CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel(CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM16. + * @param[in] info Output stage info. Used to pass the quantized output data type + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] output Output tensor. Data type supported: Data type supported: QSYMM8/QASYMM8_SIGNED/QSYMM16. + * @param[in] info Output stage info. Used to pass the quantized output data type + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_bias; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp index ad5bac015b..1d29dfe4b3 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h new file mode 100644 index 0000000000..0a8d5e1942 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED + * + * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. + * The following computations will be performed by the kernel: + * + * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier + * -# Add bias to final result if bias tensor is not a nullptr + * -# Requantize + * -# Add offset to each result + * -# Clamp the value between the specified min and max bounds + * -# Clamp the resulting int32 values to + * - to the [0..255] range and cast to QASYMM8. + * - to the [-128..127] range and cast to QASYMM8_SIGNED. + */ +class CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel : public ICLKernel +{ +public: + /** Constructor */ + CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(const CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] info Output stage info. Used to pass the quantized output data type + */ + void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] info Output stage info. Used to pass the quantized output data type + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] info Output stage info. Used to pass the quantized output data type + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_bias; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H */ diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp index 8e4b291dbe..d32d328fc2 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h new file mode 100644 index 0000000000..abdf33ea43 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED + * + * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. + * The following computations will be performed by the kernel: + * + * -# Add offset terms to final result + * -# Multiply each entry of result by result_mult_int + * -# Add bias to final result if bias tensor is not a nullptr + * -# Shift the int32 accumulator by result_shift + * -# Clamp the value between the specified min and max bounds + * -# Clamp the resulting int32 values: + * -# -to the [0..255] range and cast to QASYMM8. + * -# -to the [-128..127] range and cast to QASYMM8_SIGNED. + * + */ +class CLGEMMLowpQuantizeDownInt32ScaleKernel : public ICLKernel +{ +public: + /** Constructor */ + CLGEMMLowpQuantizeDownInt32ScaleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpQuantizeDownInt32ScaleKernel(const CLGEMMLowpQuantizeDownInt32ScaleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpQuantizeDownInt32ScaleKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpQuantizeDownInt32ScaleKernel(CLGEMMLowpQuantizeDownInt32ScaleKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpQuantizeDownInt32ScaleKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] output_stage GEMMLowp output stage metadata. + */ + void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] output_stage GEMMLowp output stage metadata. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleKernel + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] output_stage GEMMLowp output stage metadata. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_bias; + ICLTensor *_output; +}; +} // namespace arm_compute + +#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H */ \ No newline at end of file diff --git a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp index 339049ff9a..d508bf6f21 100644 --- a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLGEMMLowpReductionKernel.h b/src/core/CL/kernels/CLGEMMLowpReductionKernel.h new file mode 100644 index 0000000000..237d8099b7 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.h @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; +struct GEMMLowpReductionKernelInfo; + +/** Common interface for all OpenCL reduction kernels */ +class ICLGEMMLowpReductionKernel : public ICLKernel +{ +public: + /** Constructor */ + ICLGEMMLowpReductionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + ICLGEMMLowpReductionKernel(const ICLGEMMLowpReductionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + ICLGEMMLowpReductionKernel &operator=(const ICLGEMMLowpReductionKernel &) = delete; + /** Allow instances of this class to be moved */ + ICLGEMMLowpReductionKernel(ICLGEMMLowpReductionKernel &&) = default; + /** Allow instances of this class to be moved */ + ICLGEMMLowpReductionKernel &operator=(ICLGEMMLowpReductionKernel &&) = default; + + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. + * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + */ + virtual void configure(const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0; + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. + * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + */ + virtual void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0; + +protected: + const ICLTensor *_input; + ICLTensor *_output; +}; + +/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A. + * + * @note This stage is needed to handle the offset of matrix product + * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md + */ +class CLGEMMLowpMatrixAReductionKernel : public ICLGEMMLowpReductionKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. + * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + */ + void configure(const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override; + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. + * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override; + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixAReductionKernel + * + * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. + * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + * + * @return a status + */ + static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B. + * + * @note This stage is needed to handle the offset of matrix product + * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md + */ +class CLGEMMLowpMatrixBReductionKernel : public ICLGEMMLowpReductionKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. + * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + */ + void configure(const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override; + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. + * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override; + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixBReductionKernel + * + * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. + * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + * + * @return a status + */ + static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp index fd0978230d..b0d08a756c 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h new file mode 100644 index 0000000000..71d223b8ac --- /dev/null +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H +#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply two input matrices "A" and "B" and add a martix "C" if provided. All elements of the output matrix will be multiplied by alpha. In case matrix C is passed, it will be added to the previous result. + * For the matrix C, the broadcast addition is supported if the flag "broadcast_bias" is set in the GEMMReshapeInfo object + * + * @note If the input tensors @p input0 and @p input1 have been reshaped respectively with @ref CLGEMMReshapeLHSMatrixKernel" and @ref CLGEMMReshapeRHSMatrixKernel, + * the flag @p is_interleaved_transposed must be set to true + * + * @attention @p input1 tensor must have at least 2 dimensions (matrix) + * + */ +class CLGEMMMatrixMultiplyKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyKernel(const CLGEMMMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyKernel &operator=(const CLGEMMMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyKernel(CLGEMMMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyKernel &operator=(CLGEMMMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input, output and alpha + * + * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0 + * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported. + * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel + * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped + * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy + * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication + * + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f, + bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo()); + /** Initialise the kernel's input, output and alpha + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0 + * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported. + * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel + * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped + * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy + * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication + * + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f, + bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyKernel + * + * @param[in] input0 Input tensor containing the Matrix A info. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the Matrix B info. Data type supported: same as @p input0 + * @param[in] input2 Input tensor containing the Matrix C (bias) info. Can be nullptr. Data type supported: same as @p input0 + * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of vector C. Default value is 0. Only beta = 1 is currently supported. + * @param[in] is_interleaved_transposed True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel + * @param[in] reshape_info GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped + * @param[in] gpu_target GPU Target + * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy + * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, + bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, GPUTarget gpu_target, bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +public: + const ICLTensor *_input0; + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; + bool _slide_matrix_b; + bool _reinterpret_input_as_3d; + bool _reinterpret_output_as_3d; + bool _add_bias; + bool _broadcast_bias; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H */ diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp index cea147b10c..f613937f54 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h new file mode 100644 index 0000000000..6b6004b464 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H +#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply matrices when neither of the input matrices have been reshaped */ +class CLGEMMMatrixMultiplyNativeKernel : public ICLKernel +{ +public: + /** Default Constructor */ + CLGEMMMatrixMultiplyNativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyNativeKernel(const CLGEMMMatrixMultiplyNativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyNativeKernel &operator=(const CLGEMMMatrixMultiplyNativeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyNativeKernel(CLGEMMMatrixMultiplyNativeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyNativeKernel &operator=(CLGEMMMatrixMultiplyNativeKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input0 Input tensor for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. + * @param[out] output Output tensor info. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported: + * lhs_info.m0: 1,2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same of lhs_info.k0 + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. + * @param[out] output Output tensor info. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported: + * lhs_info.m0: 1,2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same of lhs_info.k0 + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, + const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyNativeKernel + * + * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor info for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0. + * @param[in] output Output tensor info. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported: + * lhs_info.m0: 1,2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same of lhs_info.k0 + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; + bool _slide_matrix_b; + bool _reinterpret_input_as_3d; + bool _reinterpret_output_as_3d; + bool _use_dummy_work_items; + bool _add_bias; + bool _broadcast_bias; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H*/ diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp index eaf57086a3..fb15b42fe2 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h new file mode 100644 index 0000000000..2ffc322def --- /dev/null +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H +#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply matrices when both the input matrices LHS (input0) and RHS (input1) have been reshaped + * + * @note The input matrices @p input0 and @p input1 must be reshaped through @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel + */ +class CLGEMMMatrixMultiplyReshapedKernel : public ICLKernel +{ +public: + /** Default Constructor */ + CLGEMMMatrixMultiplyReshapedKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyReshapedKernel(const CLGEMMMatrixMultiplyReshapedKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyReshapedKernel &operator=(const CLGEMMMatrixMultiplyReshapedKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyReshapedKernel(CLGEMMMatrixMultiplyReshapedKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyReshapedKernel &operator=(CLGEMMMatrixMultiplyReshapedKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag. + * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the + * multiplications. i.e. float c = (half)a * (half)b + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. + * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, + * the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement + * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * + * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4 + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3 + * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.transpose: false + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) + * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) + * rhs_info.transpose: true + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @note lhs_info.k0 must be equal to rhs_info.k0 + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + /** Initialise the kernel's input and output. + * + * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag. + * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the + * multiplications. i.e. float c = (half)a * (half)b + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. + * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, + * the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement + * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4 + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3 + * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.transpose: false + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) + * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) + * rhs_info.transpose: true + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @note lhs_info.k0 must be equal to rhs_info.k0 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, + const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedKernel + * + * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag. + * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the + * multiplications. i.e. float c = (half)a * (half)b + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. + * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, + * the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement + * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * + * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4 + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3 + * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0. + * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.transpose: false + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) + * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) + * rhs_info.transpose: true + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @note lhs_info.k0 must be equal to rhs_info.k0 + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; + bool _slide_matrix_b; + bool _reinterpret_output_as_3d; + bool _use_dummy_work_items; + bool _add_bias; + bool _broadcast_bias; + bool _export_to_cl_image; + unsigned int _k; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H*/ \ No newline at end of file diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp index d53aede3c8..1f296f8e26 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" @@ -356,10 +356,10 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::run(const Window &window, cl::Co ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0); } - const size_t lhs_idx_batch_size = _reinterpret_input_as_3d && !_has_pad_y? 3u : 2u; + const size_t lhs_idx_batch_size = _reinterpret_input_as_3d && !_has_pad_y ? 3u : 2u; const size_t rhs_idx_batch_size = 2u; const size_t bia_idx_batch_size = 2u; - const size_t out_idx_batch_size = _reinterpret_output_as_3d && !_has_pad_y? 3u : 2u; + const size_t out_idx_batch_size = _reinterpret_output_as_3d && !_has_pad_y ? 3u : 2u; Window slice = window.first_slice_window_3D(); Window slice_matrix_b = slice; diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h new file mode 100644 index 0000000000..5b96679a46 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H +#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply matrices when only the input matrix RHS (input1) has been reshaped + * + * @note The input matrix input1 must be reshaped through @ref CLGEMMReshapeRHSMatrixKernel + */ +class CLGEMMMatrixMultiplyReshapedOnlyRHSKernel : public ICLKernel +{ +public: + /** Default Constructor */ + CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(const CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &operator=(const CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &operator=(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. + * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, + * the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement + * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * + * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). + * The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported: + * lhs_info.m0: 1,2,3,4,5,6,7,8 + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.k0: 2,3,4,8,16 + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.transpose: true,false + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + /** Initialise the kernel's input and output. + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. + * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, + * the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement + * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). + * The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported: + * lhs_info.m0: 1,2,3,4,5,6,7,8 + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.k0: 2,3,4,8,16 + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.transpose: true,false + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, + const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. + * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, + * the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement + * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * + * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). + * The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor info for the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0. + * @param[in] output Output tensor info. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported: + * lhs_info.m0: 1,2,3,4,5,6,7,8 + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.k0: 2,3,4,8,16 + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.transpose: true,false + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; + bool _slide_matrix_b; + bool _reinterpret_input_as_3d; + bool _reinterpret_output_as_3d; + bool _use_dummy_work_items; + bool _add_bias; + bool _broadcast_bias; + bool _export_to_cl_image; + bool _has_pad_y; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H*/ diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp index 04aa061e98..ee0abc56d3 100644 --- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h new file mode 100644 index 0000000000..bef8c231ac --- /dev/null +++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H +#define ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the GEMM matrix vector multiply kernel. **/ +class CLGEMMMatrixVectorMultiplyKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMMatrixVectorMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixVectorMultiplyKernel(const CLGEMMMatrixVectorMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixVectorMultiplyKernel &operator=(const CLGEMMMatrixVectorMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixVectorMultiplyKernel(CLGEMMMatrixVectorMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixVectorMultiplyKernel &operator=(CLGEMMMatrixVectorMultiplyKernel &&) = default; + /** Set the input and output of the kernel. + * + * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input. + * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); + /** Set the input and output of the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input. + * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixVectorMultiplyKernel + * + * @param[in] input0 The reshaped input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] input1 The 2D reshaped weights tensor info. Data type supported: Same as @p input. + * @param[in] output The output 2D tensor info. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; + int _num_rows_read_per_iteration; + BorderSize _border_size; +}; +} // arm_compute +#endif /*ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H */ diff --git a/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp index f2ad677976..3e2fc79704 100644 --- a/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp +++ b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h new file mode 100644 index 0000000000..92202a26fc --- /dev/null +++ b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H +#define ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to reshape the LHS matrix when performing the matrix multiplication. + * In particular, this function splits the input matrix in blocks of size M0xK0 (defined through GEMMLHSInfo) and + * stores each one in the output matrix unrolling the values + */ +class CLGEMMReshapeLHSMatrixKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMReshapeLHSMatrixKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMReshapeLHSMatrixKernel(const CLGEMMReshapeLHSMatrixKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMReshapeLHSMatrixKernel &operator=(const CLGEMMReshapeLHSMatrixKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMReshapeLHSMatrixKernel(CLGEMMReshapeLHSMatrixKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMReshapeLHSMatrixKernel &operator=(CLGEMMReshapeLHSMatrixKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary + * information to reshape the input tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.v0: greater than 0 + * lhs_info.transpose: true, false + * lhs_info.interleave: true, false + * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor + */ + void configure(const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary + * information to reshape the input tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.v0: greater than 0 + * lhs_info.transpose: true, false + * lhs_info.interleave: true, false + * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeLHSMatrixKernel + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input. + * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary + * information to reshape the input tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.v0: greater than 0 + * lhs_info.transpose: true, false + * lhs_info.interleave: true, false + * @param[in] reinterpret_input_as_3d True if the input has to be reinterpreted as 3D tensor + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d); + + // Inherited methods overridden + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + bool _reinterpret_input_as_3d; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H */ \ No newline at end of file diff --git a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp index d94e834d2c..33de61ed01 100644 --- a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp +++ b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h new file mode 100644 index 0000000000..911484ea76 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H +#define ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to reshape the RHS matrix when performing the matrix multiplication + * In particular, this kernel splits the input matrix in blocks of size K0xN0 and stores each one in + * the output matrix unrolling the values */ +class CLGEMMReshapeRHSMatrixKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMReshapeRHSMatrixKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMReshapeRHSMatrixKernel(const CLGEMMReshapeRHSMatrixKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMReshapeRHSMatrixKernel &operator=(const CLGEMMReshapeRHSMatrixKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMReshapeRHSMatrixKernel(CLGEMMReshapeRHSMatrixKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMReshapeRHSMatrixKernel &operator=(CLGEMMReshapeRHSMatrixKernel &&) = default; + /** Default destructor */ + ~CLGEMMReshapeRHSMatrixKernel() = default; + /** Initialise the kernel's input and output. + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will guarantee the OpenCL pitch alignment for the output tensor, + * required to create a OpenCL image object from buffer in @ref CLGEMMMatrixMultiplyReshapedKernel and in @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel + * Since the OpenCL image object is created importing the OpenCL buffer, the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32, F16 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# output width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# output (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * -# The output tensor should be only consumed by @ref CLGEMMMatrixMultiplyReshapedKernel or @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary + * information to reshape the input tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image == true) + * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false), (only 4, 8 and 16 if rhs_info.export_to_cl_image == true) + * rhs_info.h0: greater than 0 + * rhs_info.transpose: true, false + * rhs_info.interleave: true, false + */ + void configure(const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info); + /** Initialise the kernel's input and output. + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will guarantee the OpenCL pitch alignment for the output tensor, + * required to create a OpenCL image object from buffer in @ref CLGEMMMatrixMultiplyReshapedKernel and in @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel + * Since the OpenCL image object is created importing the OpenCL buffer, the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32, F16 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# output width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# output (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * -# The output tensor should be only consumed by @ref CLGEMMMatrixMultiplyReshapedKernel or @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary + * information to reshape the input tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image == true) + * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false), (only 4, 8 and 16 if rhs_info.export_to_cl_image == true) + * rhs_info.h0: greater than 0 + * rhs_info.transpose: true, false + * rhs_info.interleave: true, false + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeRHSMatrixKernel + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will guarantee the OpenCL pitch alignment for the output tensor, + * required to create a OpenCL image object from buffer in @ref CLGEMMMatrixMultiplyReshapedKernel and in @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel + * Since the OpenCL image object is created importing the OpenCL buffer, the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32, F16 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# output width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# output (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * -# The output tensor should be only consumed by @ref CLGEMMMatrixMultiplyReshapedKernel or @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input. + * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary + * information to reshape the input tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image == true) + * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false),(only 4, 8 and 16 if rhs_info.export_to_cl_image == true) + * rhs_info.h0: greater than 0 + * rhs_info.transpose: true, false + * rhs_info.interleave: true, false + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const GEMMRHSMatrixInfo &rhs_info); + + // Inherited methods overridden + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H */ \ No newline at end of file diff --git a/src/core/CL/kernels/CLGatherKernel.cpp b/src/core/CL/kernels/CLGatherKernel.cpp index a8508bed2d..9e802c20fb 100644 --- a/src/core/CL/kernels/CLGatherKernel.cpp +++ b/src/core/CL/kernels/CLGatherKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGatherKernel.h" +#include "src/core/CL/kernels/CLGatherKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/helpers/AutoConfiguration.h" diff --git a/src/core/CL/kernels/CLGatherKernel.h b/src/core/CL/kernels/CLGatherKernel.h new file mode 100644 index 0000000000..8f472a4696 --- /dev/null +++ b/src/core/CL/kernels/CLGatherKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGATHERKERNEL_H +#define ARM_COMPUTE_CLGATHERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to perform tensor reshaping */ +class CLGatherKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGatherKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGatherKernel(const CLGatherKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGatherKernel &operator=(const CLGatherKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGatherKernel(CLGatherKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGatherKernel &operator=(CLGatherKernel &&) = default; + /** Default destructor */ + ~CLGatherKernel() = default; + /** Initialise the kernel's inputs and outputs + * + * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All. + * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 + */ + void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); + /** Initialise the kernel's inputs and outputs + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All. + * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); + + /** Static function to check if given info will lead to a valid configuration of @ref CLGatherKernel + * + * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All. + * @param[in] indices Indices tensor info. Supported tensor rank: up to 4. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis = 0); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + const ICLTensor *_indices; /**< Indices tensor */ + ICLTensor *_output; /**< Destination tensor */ + int _axis; /**< Axis index */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGATHERKERNEL_H */ diff --git a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp index c9ed1ac0d7..40e9658ab4 100644 --- a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp +++ b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h" +#include "src/core/CL/kernels/CLGaussian3x3Kernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLGaussian3x3Kernel.h b/src/core/CL/kernels/CLGaussian3x3Kernel.h new file mode 100644 index 0000000000..139b05d44c --- /dev/null +++ b/src/core/CL/kernels/CLGaussian3x3Kernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H +#define ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the Gaussian 3x3 filter kernel. + * + */ +class CLGaussian3x3Kernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H */ diff --git a/src/core/CL/kernels/CLGaussian5x5Kernel.cpp b/src/core/CL/kernels/CLGaussian5x5Kernel.cpp index 5b3639f025..46a7576154 100644 --- a/src/core/CL/kernels/CLGaussian5x5Kernel.cpp +++ b/src/core/CL/kernels/CLGaussian5x5Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h" +#include "src/core/CL/kernels/CLGaussian5x5Kernel.h" #include diff --git a/src/core/CL/kernels/CLGaussian5x5Kernel.h b/src/core/CL/kernels/CLGaussian5x5Kernel.h new file mode 100644 index 0000000000..711710b3b3 --- /dev/null +++ b/src/core/CL/kernels/CLGaussian5x5Kernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H +#define ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H + +#include "src/core/CL/kernels/CLConvolutionKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run the horizontal pass of 5x5 Gaussian filter on a tensor. */ +class CLGaussian5x5HorKernel : public CLSeparableConvolution5x5HorKernel +{ +public: + /** Initialise the kernel's source, destination and border. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); + +private: + //Make the configure method of the parent class private + using CLSeparableConvolution5x5HorKernel::configure; +}; + +/** Interface for the kernel to run the vertical pass of 5x5 Gaussian filter on a tensor. */ +class CLGaussian5x5VertKernel : public CLSeparableConvolution5x5VertKernel +{ +public: + /** Initialise the kernel's source, destination and border. + * + * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); + +private: + //Make the configure method of the parent class private + using CLSeparableConvolution5x5VertKernel::configure; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H */ diff --git a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp index 2686e8b32e..065f7f7e92 100644 --- a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp +++ b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h" +#include "src/core/CL/kernels/CLGaussianPyramidKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLGaussianPyramidKernel.h b/src/core/CL/kernels/CLGaussianPyramidKernel.h new file mode 100644 index 0000000000..a6595440f6 --- /dev/null +++ b/src/core/CL/kernels/CLGaussianPyramidKernel.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H +#define ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H + +#include "src/core/CL/ICLSimpleKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a Gaussian filter and half scaling across width (horizontal pass) */ +class CLGaussianPyramidHorKernel : public ICLSimpleKernel +{ +public: + /** Default constructor */ + CLGaussianPyramidHorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidHorKernel(const CLGaussianPyramidHorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidHorKernel &operator=(const CLGaussianPyramidHorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGaussianPyramidHorKernel(CLGaussianPyramidHorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGaussianPyramidHorKernel &operator=(CLGaussianPyramidHorKernel &&) = default; + /** Default destructor */ + ~CLGaussianPyramidHorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16. + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + int _l2_load_offset; +}; + +/** OpenCL kernel to perform a Gaussian filter and half scaling across height (vertical pass) */ +class CLGaussianPyramidVertKernel : public ICLSimpleKernel +{ +public: + /** Default constructor */ + CLGaussianPyramidVertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidVertKernel(const CLGaussianPyramidVertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidVertKernel &operator=(const CLGaussianPyramidVertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGaussianPyramidVertKernel(CLGaussianPyramidVertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGaussianPyramidVertKernel &operator=(CLGaussianPyramidVertKernel &&) = default; + /** Default destructor */ + ~CLGaussianPyramidVertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data types supported: U16. + * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8. + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U16. + * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + int _t2_load_offset; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H */ diff --git a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp index a2fcbbab78..dd3faf50a2 100644 --- a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp +++ b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h" +#include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.h b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.h new file mode 100644 index 0000000000..d26795ac7d --- /dev/null +++ b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H +#define ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" +namespace arm_compute +{ +class ICLTensor; + +/** Interface for Compute All Anchors kernel */ +class CLComputeAllAnchorsKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLComputeAllAnchorsKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComputeAllAnchorsKernel(const CLComputeAllAnchorsKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComputeAllAnchorsKernel &operator=(const CLComputeAllAnchorsKernel &) = delete; + /** Allow instances of this class to be moved */ + CLComputeAllAnchorsKernel(CLComputeAllAnchorsKernel &&) = default; + /** Allow instances of this class to be moved */ + CLComputeAllAnchorsKernel &operator=(CLComputeAllAnchorsKernel &&) = default; + /** Default destructor */ + ~CLComputeAllAnchorsKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 + * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input + * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo + * + */ + void configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 + * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input + * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo + * + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info); + + /** Static function to check if given info will lead to a valid configuration of @ref CLComputeAllAnchorsKernel + * + * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 + * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input + * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo + * + * @return a Status + */ + static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_anchors; + ICLTensor *_all_anchors; +}; +} // arm_compute +#endif // ARM_COMPUTE_CLGENERATEPROSPOSALSLAYERKERNEL_H diff --git a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp index eaf5ea4880..cd3f1ee216 100644 --- a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp +++ b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h" +#include "src/core/CL/kernels/CLHOGDescriptorKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLHOGDescriptorKernel.h b/src/core/CL/kernels/CLHOGDescriptorKernel.h new file mode 100644 index 0000000000..eee2fa36bc --- /dev/null +++ b/src/core/CL/kernels/CLHOGDescriptorKernel.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H +#define ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H + +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/Size2D.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** OpenCL kernel to perform HOG Orientation Binning */ +class CLHOGOrientationBinningKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHOGOrientationBinningKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGOrientationBinningKernel(const CLHOGOrientationBinningKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGOrientationBinningKernel &operator=(const CLHOGOrientationBinningKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHOGOrientationBinningKernel(CLHOGOrientationBinningKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHOGOrientationBinningKernel &operator=(CLHOGOrientationBinningKernel &&) = default; + /** Default destructor */ + ~CLHOGOrientationBinningKernel() = default; + + /** Initialise the kernel's inputs, output and HOG's metadata + * + * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. + * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 + * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[in] hog_info HOG's metadata + */ + void configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info); + /** Initialise the kernel's inputs, output and HOG's metadata + * + * @param[in] compile_context The compile context to be used. + * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. + * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 + * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[in] hog_info HOG's metadata + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input_magnitude; + const ICLTensor *_input_phase; + ICLTensor *_output; + Size2D _cell_size; +}; + +/** OpenCL kernel to perform HOG block normalization */ +class CLHOGBlockNormalizationKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHOGBlockNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGBlockNormalizationKernel(const CLHOGBlockNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGBlockNormalizationKernel &operator=(const CLHOGBlockNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHOGBlockNormalizationKernel(CLHOGBlockNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHOGBlockNormalizationKernel &operator=(CLHOGBlockNormalizationKernel &&) = default; + /** Default destructor */ + ~CLHOGBlockNormalizationKernel() = default; + + /** Initialise the kernel's input, output and HOG's metadata + * + * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog_info HOG's metadata + */ + void configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info); + /** Initialise the kernel's input, output and HOG's metadata + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog_info HOG's metadata + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + Size2D _num_cells_per_block_stride; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H */ diff --git a/src/core/CL/kernels/CLHOGDetectorKernel.cpp b/src/core/CL/kernels/CLHOGDetectorKernel.cpp index 6e14996732..861155b9a2 100644 --- a/src/core/CL/kernels/CLHOGDetectorKernel.cpp +++ b/src/core/CL/kernels/CLHOGDetectorKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h" +#include "src/core/CL/kernels/CLHOGDetectorKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLHOGDetectorKernel.h b/src/core/CL/kernels/CLHOGDetectorKernel.h new file mode 100644 index 0000000000..c28e6ebe74 --- /dev/null +++ b/src/core/CL/kernels/CLHOGDetectorKernel.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLHOGDETECTORKERNEL_H +#define ARM_COMPUTE_CLHOGDETECTORKERNEL_H + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/ICLHOG.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "src/core/CL/ICLKernel.h" + +namespace cl +{ +class Buffer; +} + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform HOG detector kernel using linear SVM */ +class CLHOGDetectorKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHOGDetectorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGDetectorKernel(const CLHOGDetectorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGDetectorKernel &operator=(const CLHOGDetectorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHOGDetectorKernel(CLHOGDetectorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHOGDetectorKernel &operator=(CLHOGDetectorKernel &&) = default; + /** Default destructor */ + ~CLHOGDetectorKernel() = default; + + /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect + * + * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel + * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects + * @param[in] num_detection_windows Number of detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the hog->info()->block_stride() + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, + uint16_t idx_class = 0); + /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel + * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects + * @param[in] num_detection_windows Number of detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the hog->info()->block_stride() + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, + const Size2D &detection_window_stride, float threshold = 0.0f, + uint16_t idx_class = 0); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue); + +private: + const ICLTensor *_input; + ICLDetectionWindowArray *_detection_windows; + cl::Buffer *_num_detection_windows; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLHOGDETECTORKERNEL_H */ diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.cpp b/src/core/CL/kernels/CLHarrisCornersKernel.cpp index 19c4e579a0..cbc056fb77 100644 --- a/src/core/CL/kernels/CLHarrisCornersKernel.cpp +++ b/src/core/CL/kernels/CLHarrisCornersKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" +#include "src/core/CL/kernels/CLHarrisCornersKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.h b/src/core/CL/kernels/CLHarrisCornersKernel.h new file mode 100644 index 0000000000..6482b0aa4e --- /dev/null +++ b/src/core/CL/kernels/CLHarrisCornersKernel.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLHARRISCORNERSKERNEL_H +#define ARM_COMPUTE_CLHARRISCORNERSKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the harris score kernel. + * + * @note The implementation supports 3, 5, and 7 for the block_size. + */ +class CLHarrisScoreKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHarrisScoreKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHarrisScoreKernel(const CLHarrisScoreKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHarrisScoreKernel &operator=(const CLHarrisScoreKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHarrisScoreKernel(CLHarrisScoreKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHarrisScoreKernel &operator=(CLHarrisScoreKernel &&) = default; + /** Default destructor */ + ~CLHarrisScoreKernel() = default; + + /** Setup the kernel parameters + * + * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2) + * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1) + * @param[out] output Destination image (harris score). Data types supported F32 + * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7 + * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) + * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output, + int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, + bool border_undefined); + /** Setup the kernel parameters + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2) + * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1) + * @param[out] output Destination image (harris score). Data types supported F32 + * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7 + * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) + * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input1, const ICLImage *input2, ICLImage *output, + int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, + bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +protected: + const ICLImage *_input1; /**< Source image - Gx component */ + const ICLImage *_input2; /**< Source image - Gy component */ + ICLImage *_output; /**< Source image - Harris score */ + float _sensitivity; /**< Sensitivity value */ + float _strength_thresh; /**< Threshold value */ + float _norm_factor; /**< Normalization factor */ + BorderSize _border_size; /**< Border size */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLHARRISCORNERSKERNEL_H */ diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp index 3f5e91e5a1..8aa7366d50 100644 --- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h" +#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h new file mode 100644 index 0000000000..f4cb627052 --- /dev/null +++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H +#define ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +/** Interface for the height concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class CLHeightConcatenateLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHeightConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHeightConcatenateLayerKernel(const CLHeightConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHeightConcatenateLayerKernel &operator=(const CLHeightConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHeightConcatenateLayerKernel(CLHeightConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHeightConcatenateLayerKernel &operator=(CLHeightConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~CLHeightConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[in] height_offset The starting offset on the Y axis for the output tensor. + * @param[out] output Output tensor. Data types supported: Same as @p input. + * + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int height_offset, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLHeightConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: All. + * @param[in] height_offset The starting offset on the Y axis for the output tensor. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; + +private: + unsigned int _height_offset; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLHistogramKernel.cpp b/src/core/CL/kernels/CLHistogramKernel.cpp index a85429c1a0..ca5322aa51 100644 --- a/src/core/CL/kernels/CLHistogramKernel.cpp +++ b/src/core/CL/kernels/CLHistogramKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLHistogramKernel.h" +#include "src/core/CL/kernels/CLHistogramKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLDistribution1D.h" diff --git a/src/core/CL/kernels/CLHistogramKernel.h b/src/core/CL/kernels/CLHistogramKernel.h new file mode 100644 index 0000000000..9c97c6590d --- /dev/null +++ b/src/core/CL/kernels/CLHistogramKernel.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLHISTOGRAMKERNEL_H +#define ARM_COMPUTE_CLHISTOGRAMKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLDistribution1D; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface to run the histogram kernel. This kernel processes the part of image with width can be divided by 16. + * If the image width is not a multiple of 16, remaining pixels have to be processed with the @ref CLHistogramBorderKernel + */ +class CLHistogramKernel : public ICLKernel +{ +public: + /** Constructor */ + CLHistogramKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramKernel(const CLHistogramKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramKernel &operator=(const CLHistogramKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHistogramKernel(CLHistogramKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHistogramKernel &operator=(CLHistogramKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Destination distribution. + */ + void configure(const ICLImage *input, ICLDistribution1D *output); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Destination distribution. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; + ICLDistribution1D *_output; +}; + +/** Interface to run the histogram kernel to handle the leftover part of image + * + */ +class CLHistogramBorderKernel : public ICLKernel +{ +public: + /** Constructor */ + CLHistogramBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramBorderKernel(const CLHistogramBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramBorderKernel &operator=(const CLHistogramBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHistogramBorderKernel(CLHistogramBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHistogramBorderKernel &operator=(CLHistogramBorderKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Destination distribution. + */ + void configure(const ICLImage *input, ICLDistribution1D *output); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Destination distribution. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; + ICLDistribution1D *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLHISTOGRAMKERNEL_H*/ diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 76490f82f6..0789cdc8a7 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" +#include "src/core/CL/kernels/CLIm2ColKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLIm2ColKernel.h b/src/core/CL/kernels/CLIm2ColKernel.h new file mode 100644 index 0000000000..2920c7d138 --- /dev/null +++ b/src/core/CL/kernels/CLIm2ColKernel.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLIM2COLKERNEL_H +#define ARM_COMPUTE_CLIM2COLKERNEL_H + +#include "arm_compute/core/Size2D.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the im2col reshape kernel. + * + * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. + * It is used to transform a convolution to a plain matrix multiplication. + * + * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have: + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * = + * \left( \begin{array}{ccccccccc} + * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\ + * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\ + * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\ + * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + */ +class CLIm2ColKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLIm2ColKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLIm2ColKernel(const CLIm2ColKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLIm2ColKernel &operator=(const CLIm2ColKernel &) = delete; + /** Allow instances of this class to be moved */ + CLIm2ColKernel(CLIm2ColKernel &&) = default; + /** Allow instances of this class to be moved */ + CLIm2ColKernel &operator=(CLIm2ColKernel &&) = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, + * while every dimension above represents a batch. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + * This is valid only for non-quantized inputs. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. + * Number of groups other than 1 is only supported for NCHW data layout. + * Number of groups should be multiple to the number of channels. + */ + void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), + unsigned int num_groups = 1); + /** Set the input and output of the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, + * while every dimension above represents a batch. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, + const Size2D &dilation = Size2D(1U, 1U), + unsigned int num_groups = 1); + /** Static function to check if given info will lead to a valid configuration of @ref CLIm2ColKernel + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, + * while every dimension above represents a batch. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + * This is valid only for non-quantized inputs. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. + * Number of groups other than 1 is only supported for NCHW data layout. + * Number of groups should be multiple to the number of channels. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), + unsigned int num_groups = 1); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +public: + const ICLTensor *_input; + ICLTensor *_output; + DataLayout _data_layout; + std::pair _convolved_dims; + unsigned int _num_elems_processed_per_iteration; + Size2D _kernel_dims; + PadStrideInfo _conv_info; + unsigned int _num_groups; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLIM2COLKERNEL_H */ diff --git a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp index e97b856456..4c3b404be7 100644 --- a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h" +#include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h new file mode 100644 index 0000000000..d4444f0b20 --- /dev/null +++ b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** Interface for performing an instance normalization */ +class CLInstanceNormalizationLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLInstanceNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLInstanceNormalizationLayerKernel(const CLInstanceNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLInstanceNormalizationLayerKernel &operator=(const CLInstanceNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + CLInstanceNormalizationLayerKernel(CLInstanceNormalizationLayerKernel &&) = default; + /** Default move assignment operator */ + CLInstanceNormalizationLayerKernel &operator=(CLInstanceNormalizationLayerKernel &&) = default; + /** Default destructor */ + ~CLInstanceNormalizationLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC + * In case of @p output tensor = nullptr this tensor will store the result of the normalization. + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. + * @param[in] info Kernel meta-data descriptor + */ + void configure(ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC + * In case of @p output tensor = nullptr this tensor will store the result of the normalization. + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. + * @param[in] info Kernel meta-data descriptor + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info); + + /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer. + * + * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NHWC, NCHW + * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. + * @param[in] info Kernel meta-data descriptor + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; + bool _run_in_place; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLIntegralImageKernel.cpp b/src/core/CL/kernels/CLIntegralImageKernel.cpp index 82f6da85a5..5e5683d231 100644 --- a/src/core/CL/kernels/CLIntegralImageKernel.cpp +++ b/src/core/CL/kernels/CLIntegralImageKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h" +#include "src/core/CL/kernels/CLIntegralImageKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLIntegralImageKernel.h b/src/core/CL/kernels/CLIntegralImageKernel.h new file mode 100644 index 0000000000..0e40e3afbc --- /dev/null +++ b/src/core/CL/kernels/CLIntegralImageKernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H +#define ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H + +#include "src/core/CL/ICLKernel.h" +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface to run the horizontal pass of the integral image kernel. */ +class CLIntegralImageHorKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output Destination tensor, Data types supported: U32. + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output Destination tensor, Data types supported: U32. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); +}; + +/** Interface to run the vertical pass of the integral image kernel. */ +class CLIntegralImageVertKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLIntegralImageVertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLIntegralImageVertKernel(const CLIntegralImageVertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLIntegralImageVertKernel &operator=(const CLIntegralImageVertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLIntegralImageVertKernel(CLIntegralImageVertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLIntegralImageVertKernel &operator=(CLIntegralImageVertKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in,out] in_out The input/output tensor. Data types supported: U32 + */ + void configure(ICLTensor *in_out); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] in_out The input/output tensor. Data types supported: U32 + */ + void configure(const CLCompileContext &compile_context, ICLTensor *in_out); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_in_out; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H */ diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp index 9936e29c5f..9e91d98f7c 100644 --- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp +++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h" +#include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.h b/src/core/CL/kernels/CLL2NormalizeLayerKernel.h new file mode 100644 index 0000000000..edc0585217 --- /dev/null +++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H +#define ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */ +class CLL2NormalizeLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLL2NormalizeLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLL2NormalizeLayerKernel(const CLL2NormalizeLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLL2NormalizeLayerKernel &operator=(const CLL2NormalizeLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLL2NormalizeLayerKernel(CLL2NormalizeLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLL2NormalizeLayerKernel &operator=(CLL2NormalizeLayerKernel &&) = default; + /** Default destructor */ + ~CLL2NormalizeLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[in] sum Sum values tensor. Data types supported: same as @p input. + * Sum will have the same number of dimensions as input. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 + * @param[in] epsilon Lower bound value for the normalization. + */ + void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[in] sum Sum values tensor. Data types supported: same as @p input. + * Sum will have the same number of dimensions as input. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 + * @param[in] epsilon Lower bound value for the normalization. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon); + + /** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayerKernel. + * + * @param[in] input Source tensor info. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[in] sum Sum values tensor info. Data types supported: same as @p input. + * Sum will have the same number of dimensions as input. + * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 + * @param[in] epsilon Lower bound value for the normalization. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_sum; + ICLTensor *_output; + unsigned int _actual_axis; + float _epsilon; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLLKTrackerKernel.cpp b/src/core/CL/kernels/CLLKTrackerKernel.cpp index 0fa2e703ec..a439c2448e 100644 --- a/src/core/CL/kernels/CLLKTrackerKernel.cpp +++ b/src/core/CL/kernels/CLLKTrackerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h" +#include "src/core/CL/kernels/CLLKTrackerKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLArray.h" diff --git a/src/core/CL/kernels/CLLKTrackerKernel.h b/src/core/CL/kernels/CLLKTrackerKernel.h new file mode 100644 index 0000000000..2d2966854a --- /dev/null +++ b/src/core/CL/kernels/CLLKTrackerKernel.h @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLLKTRACKERKERNEL_H +#define ARM_COMPUTE_CLLKTRACKERKERNEL_H + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +#include +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface to run the initialization step of LKTracker */ +class CLLKTrackerInitKernel : public ICLKernel +{ +public: + /** Initialise the kernel input and output + * + * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points + * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points + * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] level The pyramid level + * @param[in] num_levels The number of pyramid levels + * @param[in] pyramid_scale Scale factor used for generating the pyramid + */ + void configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale); + /** Initialise the kernel input and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points + * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points + * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] level The pyramid level + * @param[in] num_levels The number of pyramid levels + * @param[in] pyramid_scale Scale factor used for generating the pyramid + */ + void configure(const CLCompileContext &compile_context, const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** Interface to run the finalize step of LKTracker, where it truncates the coordinates stored in new_points array */ +class CLLKTrackerFinalizeKernel : public ICLKernel +{ +public: + /** Initialise the kernel input and output + * + * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points + * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points + */ + void configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points); + /** Initialise the kernel input and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points + * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points + */ + void configure(const CLCompileContext &compile_context, ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** Interface to run the first stage of LKTracker, where A11, A12, A22, min_eig, ival, ixval and iyval are computed */ +class CLLKTrackerStage0Kernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLKTrackerStage0Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLKTrackerStage0Kernel(const CLLKTrackerStage0Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLKTrackerStage0Kernel &operator=(const CLLKTrackerStage0Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLLKTrackerStage0Kernel(CLLKTrackerStage0Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLLKTrackerStage0Kernel &operator=(CLLKTrackerStage0Kernel &&) = default; + /** Initialise the kernel input and output + * + * @param[in] old_input Pointer to the input old tensor. Data types supported: U8 + * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16 + * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16 + * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points + * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points + * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients + * @param[out] old_ival Pointer to the array holding internal values + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + */ + void configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, + size_t window_dimension, size_t level); + /** Initialise the kernel input and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] old_input Pointer to the input old tensor. Data types supported: U8 + * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16 + * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16 + * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points + * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points + * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients + * @param[out] old_ival Pointer to the array holding internal values + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, + size_t window_dimension, size_t level); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_old_input; + const ICLTensor *_old_scharr_gx; + const ICLTensor *_old_scharr_gy; +}; + +/** Interface to run the second stage of LKTracker, where the motion vectors of the given points are computed */ +class CLLKTrackerStage1Kernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLKTrackerStage1Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLKTrackerStage1Kernel(const CLLKTrackerStage1Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLKTrackerStage1Kernel &operator=(const CLLKTrackerStage1Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLLKTrackerStage1Kernel(CLLKTrackerStage1Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLLKTrackerStage1Kernel &operator=(CLLKTrackerStage1Kernel &&) = default; + /** Initialise the kernel input and output + * + * @param[in] new_input Pointer to the input new tensor. Data types supported: U8 + * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points + * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients + * @param[in] old_ival Pointer to the array holding internal values + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminating the algorithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + */ + void configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, + Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level); + /** Initialise the kernel input and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] new_input Pointer to the input new tensor. Data types supported: U8 + * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points + * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients + * @param[in] old_ival Pointer to the array holding internal values + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminating the algorithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, + Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_new_input; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLLKTRACKERKERNEL_H */ diff --git a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp index 6e4c45eab7..49e04c32c2 100644 --- a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" +#include "src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h new file mode 100644 index 0000000000..5d0a22afa5 --- /dev/null +++ b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H +#define ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply each row of first tensor with low 2 dimensions of second tensor. + * + * @attention The second input tensor must have at least 2 dimensions (matrix) + * + */ +class CLLocallyConnectedMatrixMultiplyKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLocallyConnectedMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLocallyConnectedMatrixMultiplyKernel(const CLLocallyConnectedMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLocallyConnectedMatrixMultiplyKernel &operator=(const CLLocallyConnectedMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLLocallyConnectedMatrixMultiplyKernel(CLLocallyConnectedMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLLocallyConnectedMatrixMultiplyKernel &operator=(CLLocallyConnectedMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input, output and alpha + * + * @param[in] input0 First input tensor. Data types supported: F32 + * @param[in] input1 Second input tensor. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result. Data type supported: same as @p input0 + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); + /** Initialise the kernel's input, output and alpha + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 First input tensor. Data types supported: F32 + * @param[in] input1 Second input tensor. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result. Data type supported: same as @p input0 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLLocallyConnectedMatrixMultiplyKernel + * + * @param[in] input0 First input tensor info. Data types supported: F32 + * @param[in] input1 Second input tensor info. Data type supported: same as @p input0 + * @param[in] output Output tensor info. Data type supported: same as @p input0 + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H */ diff --git a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp index dc130d0ff9..9845dd6169 100644 --- a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp +++ b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" +#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLMagnitudePhaseKernel.h b/src/core/CL/kernels/CLMagnitudePhaseKernel.h new file mode 100644 index 0000000000..514036b2ff --- /dev/null +++ b/src/core/CL/kernels/CLMagnitudePhaseKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H +#define ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Template interface for the kernel to compute magnitude and phase. + * + */ +class CLMagnitudePhaseKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLMagnitudePhaseKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMagnitudePhaseKernel(const CLMagnitudePhaseKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMagnitudePhaseKernel &operator=(const CLMagnitudePhaseKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMagnitudePhaseKernel(CLMagnitudePhaseKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMagnitudePhaseKernel &operator=(CLMagnitudePhaseKernel &&) = default; + /** Initialise the kernel's input, output. + * + * @note At least one of output1 or output2 must be set. + * + * @param[in] gx The input gradient X tensor. Data types supported: S16/S32. + * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32. + * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8. + * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. + * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. + */ + void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, + MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED); + /** Initialise the kernel's input, output. + * + * @note At least one of output1 or output2 must be set. + * + * @param[in] compile_context The compile context to be used. + * @param[in] gx The input gradient X tensor. Data types supported: S16/S32. + * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32. + * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8. + * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. + * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, + MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_gx; /**< Input gradient X. */ + const ICLTensor *_gy; /**< Input gradient Y. */ + ICLTensor *_magnitude; /**< Output - Magnitude. */ + ICLTensor *_phase; /**< Output - Phase. */ + bool _run_mag; /**< Calculate magnitude ? */ + bool _run_phase; /**< Calculate phase ? */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H */ diff --git a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp index a78996ddae..2a1312af94 100644 --- a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h" +#include "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h new file mode 100644 index 0000000000..86267ec0f7 --- /dev/null +++ b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLMAXUNPOOLINGLAYERKERNEL_H +#define ARM_COMPUTE_CLMAXUNPOOLINGLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the pooling layer kernel */ +class CLMaxUnpoolingLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMaxUnpoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMaxUnpoolingLayerKernel(const CLMaxUnpoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMaxUnpoolingLayerKernel &operator=(const CLMaxUnpoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMaxUnpoolingLayerKernel(CLMaxUnpoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMaxUnpoolingLayerKernel &operator=(CLMaxUnpoolingLayerKernel &&) = default; + /** Default destructor */ + ~CLMaxUnpoolingLayerKernel() = default; + /** Set the input and output tensors. + * + * @note Output shape must be equal to the shape of the original input to pool. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] indices Tensor containing the offset to store the input elements in the output tensor. + * @ref CLPoolingLayerKernel with indices should precede this function in order to + * properly reconstruct the output tensor. + * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, const PoolingLayerInfo &pool_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLMaxUnpoolingLayerKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Destination tensor info. Data types supported: Same as @p input. + * @param[in] indices TensorInfo associated to the tensor containing the offset to store the input elements in the output tensor. + * @ref CLPoolingLayerKernel with indices should precede this function in order to + * properly reconstruct the output tensor. + * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info); + + // Inherited methods overridden + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_indices; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLMAXUNPOOLINGLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp index 5acc3ac3d6..aed6e6eaf7 100644 --- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" +#include "src/core/CL/kernels/CLMeanStdDevKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.h b/src/core/CL/kernels/CLMeanStdDevKernel.h new file mode 100644 index 0000000000..179a2025b7 --- /dev/null +++ b/src/core/CL/kernels/CLMeanStdDevKernel.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLMEANSTDDEVKERNEL_H +#define ARM_COMPUTE_CLMEANSTDDEVKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace cl +{ +class Buffer; +} + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */ +class CLMeanStdDevKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMeanStdDevKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMeanStdDevKernel(const CLMeanStdDevKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMeanStdDevKernel &operator=(const CLMeanStdDevKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMeanStdDevKernel(CLMeanStdDevKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMeanStdDevKernel &operator=(CLMeanStdDevKernel &&) = default; + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input image. Data types supported: U8. + * @param[out] mean Input average pixel value. + * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong). + * @param[out] stddev (Optional) Output standard deviation of pixel values. + * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong). + */ + void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); + /** Initialise the kernel's input and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input image. Data types supported: U8. + * @param[out] mean Input average pixel value. + * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong). + * @param[out] stddev (Optional) Output standard deviation of pixel values. + * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong). + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevKernel. + * + * @param[in] input Input image info. Data types supported: U8. + * @param[in] mean Input average pixel value. + * @param[in] global_sum Keeps global sum of pixel values. + * @param[in] stddev (Optional) Output standard deviation of pixel values. + * @param[in] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + + BorderSize border_size() const override; + +private: + const ICLImage *_input; + float *_mean; + float *_stddev; + cl::Buffer *_global_sum; + cl::Buffer *_global_sum_squared; + BorderSize _border_size; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLMEANSTDDEVKERNEL_H */ diff --git a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp index 82a22a9f19..a889df7930 100644 --- a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h" +#include "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h new file mode 100644 index 0000000000..a1ba2b905e --- /dev/null +++ b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H +#define ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to normalize the input 2D tensor across the first dimension with respect to mean and standard deviation of the same dimension. */ +class CLMeanStdDevNormalizationKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMeanStdDevNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMeanStdDevNormalizationKernel(const CLMeanStdDevNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMeanStdDevNormalizationKernel &operator=(const CLMeanStdDevNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMeanStdDevNormalizationKernel(CLMeanStdDevNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMeanStdDevNormalizationKernel &operator=(CLMeanStdDevNormalizationKernel &&) = default; + /** Default destructor */ + ~CLMeanStdDevNormalizationKernel() = default; + /** Initialise the kernel's input and outputs. + * + * @note If the output tensor is a nullptr, the normalization will be performed in-place. + * + * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr, + * this tensor will store the result of the normalization. Data types supported: F16/F32. + * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input + * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. + */ + void configure(ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f); + /** Initialise the kernel's input and outputs. + * + * @note If the output tensor is a nullptr, the normalization will be performed in-place. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr, + * this tensor will store the result of the normalization. Data types supported: F16/F32. + * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input + * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f); + /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevNormalizationKernel + * + * @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr, + * this tensor will store the result of the normalization. Data types supported: F16/F32. + * @param[in] output (Optional) Destination tensor info. It can be nullptr in case of in-place computation. Data type supported: same as @p input + * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output = nullptr, float epsilon = 1e-8f); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; + bool _run_in_place; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLMedian3x3Kernel.cpp b/src/core/CL/kernels/CLMedian3x3Kernel.cpp index 4b899502f9..23a21d6b19 100644 --- a/src/core/CL/kernels/CLMedian3x3Kernel.cpp +++ b/src/core/CL/kernels/CLMedian3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h" +#include "src/core/CL/kernels/CLMedian3x3Kernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLMedian3x3Kernel.h b/src/core/CL/kernels/CLMedian3x3Kernel.h new file mode 100644 index 0000000000..8cc5ed7279 --- /dev/null +++ b/src/core/CL/kernels/CLMedian3x3Kernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLMEDIAN3X3KERNEL_H +#define ARM_COMPUTE_CLMEDIAN3X3KERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the median 3x3 filter kernel. + * + */ +class CLMedian3x3Kernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLMEDIAN3X3KERNEL_H */ diff --git a/src/core/CL/kernels/CLMemsetKernel.cpp b/src/core/CL/kernels/CLMemsetKernel.cpp index 186ed2a38c..2543b07a1a 100644 --- a/src/core/CL/kernels/CLMemsetKernel.cpp +++ b/src/core/CL/kernels/CLMemsetKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLMemsetKernel.h" +#include "src/core/CL/kernels/CLMemsetKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/CL/kernels/CLMemsetKernel.h b/src/core/CL/kernels/CLMemsetKernel.h new file mode 100644 index 0000000000..dc103f580f --- /dev/null +++ b/src/core/CL/kernels/CLMemsetKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLMEMSETKERNEL_H +#define ARM_COMPUTE_CLMEMSETKERNEL_H + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for filling the planes of a tensor */ +class CLMemsetKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMemsetKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMemsetKernel(const CLMemsetKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMemsetKernel &operator=(const CLMemsetKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMemsetKernel(CLMemsetKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMemsetKernel &operator=(CLMemsetKernel &&) = default; + /** Default destructor */ + ~CLMemsetKernel() = default; + + /** Initialise the kernel's tensor and filling value + * + * @param[in,out] tensor Input tensor to fill. Supported data types: All. + * @param[in] constant_value The value used to fill the planes of the tensor + * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. + */ + void configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr); + /** Initialise the kernel's tensor and filling value + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] tensor Input tensor to fill. Supported data types: All. + * @param[in] constant_value The value used to fill the planes of the tensor + * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref CLMemsetKernel + * + * @param[in] tensor Source tensor info. Data types supported: All. + * @param[in] constant_value The value used to fill the planes of the tensor + * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. + * + * @return a status + */ + static Status validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_tensor; + Window _full_window; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLMEMSETRKERNEL_H */ diff --git a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp index bf645f82e9..7017efa3c2 100644 --- a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h" +#include "src/core/CL/kernels/CLMinMaxLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLMinMaxLayerKernel.h b/src/core/CL/kernels/CLMinMaxLayerKernel.h new file mode 100644 index 0000000000..aa2ff3f375 --- /dev/null +++ b/src/core/CL/kernels/CLMinMaxLayerKernel.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLMINMAXLAYERKERNEL_H +#define ARM_COMPUTE_CLMINMAXLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to perform min max search on a 3D tensor. + */ +class CLMinMaxLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMinMaxLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLayerKernel(const CLMinMaxLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLayerKernel &operator=(const CLMinMaxLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMinMaxLayerKernel(CLMinMaxLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMinMaxLayerKernel &operator=(CLMinMaxLayerKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32. + * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor. + * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32. + * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor. + * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel + * + * @param[in] input Input tensor info. Data types supported: F32. + * @param[in] output Output tensor info with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor. + * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + /** Resets global minimum and maximum + * + * @param[in,out] queue Command queue on which to map and unmap the min_max tensor + */ + void reset(cl::CommandQueue &queue); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLMINMAXLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp index 634b58077a..675cfc19a9 100644 --- a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp +++ b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h" +#include "src/core/CL/kernels/CLMinMaxLocationKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.h b/src/core/CL/kernels/CLMinMaxLocationKernel.h new file mode 100644 index 0000000000..2196abe033 --- /dev/null +++ b/src/core/CL/kernels/CLMinMaxLocationKernel.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H +#define ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H + +#include "arm_compute/core/CL/ICLArray.h" +#include "src/core/CL/ICLKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the kernel to perform min max search on an image. + */ +class CLMinMaxKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMinMaxKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxKernel(const CLMinMaxKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxKernel &operator=(const CLMinMaxKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMinMaxKernel(CLMinMaxKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMinMaxKernel &operator=(CLMinMaxKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input Image. Data types supported: U8/S16/F32. + * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. + */ + void configure(const ICLImage *input, cl::Buffer *min_max); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input Image. Data types supported: U8/S16/F32. + * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Input image. */ + cl::Buffer *_min_max; /**< Minimum/maximum value. */ + std::array _data_type_max_min; /**< Maximum and minimum data type value respectively. */ +}; + +/** Interface for the kernel to find min max locations of an image. + */ +class CLMinMaxLocationKernel : public ICLKernel +{ +public: + /** Constructor */ + CLMinMaxLocationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLocationKernel(const CLMinMaxLocationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLocationKernel &operator=(const CLMinMaxLocationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMinMaxLocationKernel(CLMinMaxLocationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMinMaxLocationKernel &operator=(CLMinMaxLocationKernel &&) = default; + /** Initialise the kernel's input and outputs. + * + * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. + * + * @param[in] input Input image. Data types supported: U8/S16/F32. + * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. + * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32 + * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. + * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. + */ + void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, + ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr); + /** Initialise the kernel's input and outputs. + * + * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input image. Data types supported: U8/S16/F32. + * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. + * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32 + * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. + * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, + ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; /**< Input image. */ + cl::Buffer *_min_max_count; /**< Minimum/maximum value occurrences. */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp index 0a8472bf04..c73acaf1d8 100644 --- a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp +++ b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h" +#include "src/core/CL/kernels/CLNonLinearFilterKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLNonLinearFilterKernel.h b/src/core/CL/kernels/CLNonLinearFilterKernel.h new file mode 100644 index 0000000000..ed42063d2b --- /dev/null +++ b/src/core/CL/kernels/CLNonLinearFilterKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H +#define ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLSimple2DKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to apply a non-linear filter */ +class CLNonLinearFilterKernel : public ICLSimple2DKernel +{ +public: + /** Default constructor */ + CLNonLinearFilterKernel(); + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data types supported: U8 + * @param[out] output Destination tensor. Data types supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, + unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, + bool border_undefined); + /** Set the source, destination and border mode of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8 + * @param[out] output Destination tensor. Data types supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, + unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, + bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; + +private: + BorderSize _border_size; /**< Border size */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H */ diff --git a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp index 9c6d44b6c5..7d5c5ba7e1 100644 --- a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp +++ b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h" +#include "src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h new file mode 100644 index 0000000000..d9ed60ce6b --- /dev/null +++ b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H +#define ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface to perform Non-Maxima suppression over a 3x3 window using OpenCL + * + * @note Used by @ref CLFastCorners and @ref CLHarrisCorners + */ +class CLNonMaximaSuppression3x3Kernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor) + * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor) + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor) + * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor) + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H */ diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp index 686e6f1b26..d1982e77b9 100644 --- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h" +#include "src/core/CL/kernels/CLNormalizationLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.h b/src/core/CL/kernels/CLNormalizationLayerKernel.h new file mode 100644 index 0000000000..739a2ae9f1 --- /dev/null +++ b/src/core/CL/kernels/CLNormalizationLayerKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the normalization layer kernel. + */ +class CLNormalizationLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLNormalizationLayerKernel(const CLNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLNormalizationLayerKernel &operator=(const CLNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + CLNormalizationLayerKernel(CLNormalizationLayerKernel &&) = default; + /** Default move assignment operator */ + CLNormalizationLayerKernel &operator=(CLNormalizationLayerKernel &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input. + * Data layouts supported: same as @p input. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input. + * Data layouts supported: same as @p input. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayerKernel + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[in] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input. + * Data layouts supported: same as @p input. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + BorderSize _border_size; + bool _is_norm_across_width; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp index 407ce6626b..18cbe217be 100644 --- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h" +#include "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h new file mode 100644 index 0000000000..6db4433e78 --- /dev/null +++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H +#define ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the NormalizePlanarYUV layer kernel. */ +class CLNormalizePlanarYUVLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLNormalizePlanarYUVLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLNormalizePlanarYUVLayerKernel(const CLNormalizePlanarYUVLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLNormalizePlanarYUVLayerKernel &operator=(const CLNormalizePlanarYUVLayerKernel &) = delete; + /** Default Move Constructor. */ + CLNormalizePlanarYUVLayerKernel(CLNormalizePlanarYUVLayerKernel &&) = default; + /** Default move assignment operator */ + CLNormalizePlanarYUVLayerKernel &operator=(CLNormalizePlanarYUVLayerKernel &&) = default; + /** Default destructor */ + ~CLNormalizePlanarYUVLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels]. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input + * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels. + * Data types supported: same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels]. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input + * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels. + * Data types supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std); + /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayerKernel + * + * @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels]. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor info. Data type supported: same as @p input + * @param[in] mean Mean values tensor info. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input + * @param[in] std Standard deviation values tensor info. 1 dimension with size equal to the number of input channels. + * Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_mean; + const ICLTensor *_std; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLPadLayerKernel.cpp b/src/core/CL/kernels/CLPadLayerKernel.cpp index 45729738fb..485676667c 100644 --- a/src/core/CL/kernels/CLPadLayerKernel.cpp +++ b/src/core/CL/kernels/CLPadLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h" +#include "src/core/CL/kernels/CLPadLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLPadLayerKernel.h b/src/core/CL/kernels/CLPadLayerKernel.h new file mode 100644 index 0000000000..2b0abb18df --- /dev/null +++ b/src/core/CL/kernels/CLPadLayerKernel.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLPADLAYERKERNEL_H +#define ARM_COMPUTE_CLPADLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the PadLayer function. */ +class CLPadLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLPadLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPadLayerKernel(const CLPadLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPadLayerKernel &operator=(const CLPadLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPadLayerKernel(CLPadLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPadLayerKernel &operator=(CLPadLayerKernel &&) = default; + /** Default destructor */ + ~CLPadLayerKernel() = default; + /** Set the input and output tensor. + * + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] + * specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding. + * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, + * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). + */ + void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT); + /** Set the input and output tensor. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] + * specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding. + * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, + * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), + PaddingMode mode = PaddingMode::CONSTANT); + /** Static function to check if given info will lead to a valid configuration of @ref CLPadLayerKernel + * + * @param[in] input Source tensor info. Data types supported: All. + * @param[in] output Output tensor info. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] + * specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding. + * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, + * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + int _input_start_x; + int _input_start_y; + bool _4d_enabled; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLPADLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLPermuteKernel.cpp b/src/core/CL/kernels/CLPermuteKernel.cpp index 620665791f..4d289f28e6 100644 --- a/src/core/CL/kernels/CLPermuteKernel.cpp +++ b/src/core/CL/kernels/CLPermuteKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLPermuteKernel.h" +#include "src/core/CL/kernels/CLPermuteKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/helpers/AutoConfiguration.h" diff --git a/src/core/CL/kernels/CLPermuteKernel.h b/src/core/CL/kernels/CLPermuteKernel.h new file mode 100644 index 0000000000..d1bb875d7a --- /dev/null +++ b/src/core/CL/kernels/CLPermuteKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLPERMUTEKERNEL_H +#define ARM_COMPUTE_CLPERMUTEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform tensor permutation. + * + * Permutes given a permutation vector + */ +class CLPermuteKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLPermuteKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPermuteKernel(const CLPermuteKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPermuteKernel &operator=(const CLPermuteKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPermuteKernel(CLPermuteKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPermuteKernel &operator=(CLPermuteKernel &&) = default; + /** Set the input and output of the kernel. + * + * @note Arbitrary permutation vectors are supported with rank not greater than 4 + * + * @param[in] input The input tensor to permute. Data types supported: All. + * @param[in] output The output tensor. Data types supported: Same as @p input + * @param[in] perm Permutation vector + */ + void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm); + /** Set the input and output of the kernel. + * + * @note Arbitrary permutation vectors are supported with rank not greater than 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to permute. Data types supported: All. + * @param[in] output The output tensor. Data types supported: Same as @p input + * @param[in] perm Permutation vector + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm); + /** Static function to check if given info will lead to a valid configuration of @ref CLPermuteKernel + * + * @note Arbitrary permutation vectors are supported with rank not greater than 4 + * + * @param[in] input First tensor input info. Data types supported: All. + * @param[in] output Output tensor info. Data types supported: same as @p input. + * @param[in] perm Permutation vector + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + PermutationVector _perm; +}; +} // arm_compute +#endif /*ARM_COMPUTE_CLPERMUTEKERNEL_H */ diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp index a7bd4dad60..a6255f8018 100644 --- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp +++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" +#include "src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h new file mode 100644 index 0000000000..0cc4005875 --- /dev/null +++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H +#define ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** Interface for the pixelwise multiplication kernel. */ +class CLPixelWiseMultiplicationKernel : public ICLKernel +{ +public: + /** Default constructor.*/ + CLPixelWiseMultiplicationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPixelWiseMultiplicationKernel(const CLPixelWiseMultiplicationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPixelWiseMultiplicationKernel &operator=(const CLPixelWiseMultiplicationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPixelWiseMultiplicationKernel(CLPixelWiseMultiplicationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPixelWiseMultiplicationKernel &operator=(CLPixelWiseMultiplicationKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,U8) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * - (QSYMM16,QSYMM16) -> S32 + * + * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate + * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale, + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Initialise the kernel's input, output and border mode. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,U8) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * - (QSYMM16,QSYMM16) -> S32 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate + * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale, + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplicationKernel + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,U8) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * - (QSYMM16,QSYMM16) -> S32 + * + * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate + * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ITensorInfo *_input1; + const ITensorInfo *_input2; + ITensorInfo *_output; +}; + +/** Interface for the complex pixelwise multiplication kernel. */ +class CLComplexPixelWiseMultiplicationKernel : public ICLKernel +{ +public: + /** Default constructor.*/ + CLComplexPixelWiseMultiplicationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComplexPixelWiseMultiplicationKernel(const CLComplexPixelWiseMultiplicationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComplexPixelWiseMultiplicationKernel &operator=(const CLComplexPixelWiseMultiplicationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLComplexPixelWiseMultiplicationKernel(CLComplexPixelWiseMultiplicationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLComplexPixelWiseMultiplicationKernel &operator=(CLComplexPixelWiseMultiplicationKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2. + * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[out] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2. + * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[out] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplicationKernel + * + * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2. + * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ITensorInfo *_input1; + const ITensorInfo *_input2; + ITensorInfo *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp index 0570887b91..905610c31f 100644 --- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp @@ -21,11 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h" +#include "src/core/CL/kernels/CLPoolingLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" @@ -34,6 +33,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/AccessWindowStatic.h" #include "src/core/CL/CLValidate.h" +#include "src/core/CL/ICLKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" @@ -176,7 +176,7 @@ std::tuple validate_and_configure_window(ITenso case DataLayout::NHWC: { // Initialize border size - border_size = BorderSize(); + border_size = BorderSize(); num_elems_processed_per_iteration = adjust_vec_size(4, output->dimension(0)); win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.h b/src/core/CL/kernels/CLPoolingLayerKernel.h new file mode 100644 index 0000000000..d88402a792 --- /dev/null +++ b/src/core/CL/kernels/CLPoolingLayerKernel.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H +#define ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/Error.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the pooling layer kernel */ +class CLPoolingLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPoolingLayerKernel(const CLPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPoolingLayerKernel &operator=(const CLPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPoolingLayerKernel(CLPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPoolingLayerKernel &operator=(CLPoolingLayerKernel &&) = default; + /** Default destructor */ + ~CLPoolingLayerKernel() = default; + + /** Set the input and output tensors. + * + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. + */ + void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr); + /** Set the input and output tensors. + * + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayerKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Destination tensor info. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +public: + const ICLTensor *_input; + ICLTensor *_output; + ICLTensor *_indices; + PoolingLayerInfo _pool_info; + DataLayout _data_layout; + BorderSize _border_size; + unsigned int _num_elems_processed_per_iteration; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp index 202e9fbb37..7b9caf0063 100644 --- a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp +++ b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h" +#include "src/core/CL/kernels/CLPriorBoxLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLPriorBoxLayerKernel.h b/src/core/CL/kernels/CLPriorBoxLayerKernel.h new file mode 100644 index 0000000000..6c369a7a4e --- /dev/null +++ b/src/core/CL/kernels/CLPriorBoxLayerKernel.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H +#define ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the PriorBox layer kernel. */ +class CLPriorBoxLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLPriorBoxLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPriorBoxLayerKernel(const CLPriorBoxLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPriorBoxLayerKernel &operator=(const CLPriorBoxLayerKernel &) = delete; + /** Default Move Constructor. */ + CLPriorBoxLayerKernel(CLPriorBoxLayerKernel &&) = default; + /** Default move assignment operator */ + CLPriorBoxLayerKernel &operator=(CLPriorBoxLayerKernel &&) = default; + /** Default destructor */ + ~CLPriorBoxLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. + * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 + * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1 + * @param[in] info Prior box layer info. + * @param[in] min Minimum prior box values + * @param[in] max Maximum prior box values + * @param[in] aspect_ratios Aspect ratio values + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, cl::Buffer *aspect_ratios); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. + * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 + * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1 + * @param[in] info Prior box layer info. + * @param[in] min Minimum prior box values + * @param[in] max Maximum prior box values + * @param[in] aspect_ratios Aspect ratio values + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, + cl::Buffer *aspect_ratios); + /** Static function to check if given info will lead to a valid configuration of @ref CLPriorBoxLayerKernel + * + * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC. + * @param[in] input2 Second source tensor info. Data types and layouts supported: same as @p input1 + * @param[in] output Destination tensor info. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input1 + * @param[in] info Prior box layer info. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; + PriorBoxLayerInfo _info; + int _num_priors; + cl::Buffer *_min; + cl::Buffer *_max; + cl::Buffer *_aspect_ratios; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp index ff6cc86103..3a66d084b9 100644 --- a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h" +#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "src/core/helpers/AutoConfiguration.h" diff --git a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h new file mode 100644 index 0000000000..31085c37ba --- /dev/null +++ b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H +#define ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to do layer normalization. */ +class CLQLSTMLayerNormalizationKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLQLSTMLayerNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLQLSTMLayerNormalizationKernel(const CLQLSTMLayerNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLQLSTMLayerNormalizationKernel &operator=(const CLQLSTMLayerNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLQLSTMLayerNormalizationKernel(CLQLSTMLayerNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLQLSTMLayerNormalizationKernel &operator=(CLQLSTMLayerNormalizationKernel &&) = default; + /** Default destructor */ + ~CLQLSTMLayerNormalizationKernel() = default; + /** Initialise the kernel's input and outputs. + * + * @param[in] input Source tensor with 2 dimensions. Data types supported: QSYMM16. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] weight Weight tensor. Data types supported: Same as @p input. + * @param[in] bias Bias tensor. Data types supported: S32. + * + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias); + /** Initialise the kernel's input and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor with 2 dimensions. Data types supported: QSYMM16. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] weight Weight tensor. Data types supported: Same as @p input. + * @param[in] bias Bias tensor. Data types supported: S32. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias); + /** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayerNormalizationKernel + * + * @param[in] input Source tensor info with 2 dimensions. Data types supported: QSYMM16. + * @param[in] output Destination info tensor. Data type supported: same as @p input + * @param[in] weight Weight info tensor. Data types supported: Same as @p input. + * @param[in] bias Bias tensor info. Data types supported: S32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_weight; + const ICLTensor *_bias; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp index 44889b9407..76e703f0dd 100644 --- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h" +#include "src/core/CL/kernels/CLQuantizationLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.h b/src/core/CL/kernels/CLQuantizationLayerKernel.h new file mode 100644 index 0000000000..e9d03decb3 --- /dev/null +++ b/src/core/CL/kernels/CLQuantizationLayerKernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the quantization layer kernel. + * + * @note The implementation supports only 3D input tensors. + */ +class CLQuantizationLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLQuantizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLQuantizationLayerKernel(const CLQuantizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLQuantizationLayerKernel &operator=(const CLQuantizationLayerKernel &) = delete; + /** Default Move Constructor. */ + CLQuantizationLayerKernel(CLQuantizationLayerKernel &&) = default; + /** Default move assignment operator */ + CLQuantizationLayerKernel &operator=(CLQuantizationLayerKernel &&) = default; + /** Default destructor */ + ~CLQuantizationLayerKernel() = default; + /** Set the input, output. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. + * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. + * + * @note Output auto initialization is not supported by this kernel + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input, output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. + * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. + * + * @note Output auto initialization is not supported by this kernel + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLQuantizationLayerKernel + * + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. + * @param[in] output Destination tensor info with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp index ca6c6fad1a..38eafc6e97 100644 --- a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp +++ b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h" +#include "src/core/CL/kernels/CLROIAlignLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLROIAlignLayerKernel.h b/src/core/CL/kernels/CLROIAlignLayerKernel.h new file mode 100644 index 0000000000..cbf0e00165 --- /dev/null +++ b/src/core/CL/kernels/CLROIAlignLayerKernel.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H +#define ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H + +#include "arm_compute/core/CL/ICLArray.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the RoIAlign kernel. + */ +class CLROIAlignLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLROIAlignLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLROIAlignLayerKernel(const CLROIAlignLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLROIAlignLayerKernel &operator=(const CLROIAlignLayerKernel &) = delete; + /** Default Move Constructor. */ + CLROIAlignLayerKernel(CLROIAlignLayerKernel &&) = default; + /** Default move assignment operator. */ + CLROIAlignLayerKernel &operator=(CLROIAlignLayerKernel &&) = default; + /** Default destructor */ + ~CLROIAlignLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner + * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. + * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + */ + void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner + * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. + * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLROIAlignLayerKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, + * otherwise same as @p input + * @param[in] output Destination tensor info. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + * + * @return a Status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue); + +private: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_rois; + ROIPoolingLayerInfo _pool_info; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H*/ diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp index 55fe5a5321..43492a3d50 100644 --- a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h" +#include "src/core/CL/kernels/CLROIPoolingLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.h b/src/core/CL/kernels/CLROIPoolingLayerKernel.h new file mode 100644 index 0000000000..35f42a9676 --- /dev/null +++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H +#define ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/CL/ICLArray.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the ROI pooling layer kernel */ +class CLROIPoolingLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLROIPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLROIPoolingLayerKernel(const CLROIPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLROIPoolingLayerKernel &operator=(const CLROIPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLROIPoolingLayerKernel(CLROIPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLROIPoolingLayerKernel &operator=(CLROIPoolingLayerKernel &&) = default; + /** Default destructor */ + ~CLROIPoolingLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32. + * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner + * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16 + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + */ + void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: F16/F32. + * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner + * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16 + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_rois; + ICLTensor *_output; + ROIPoolingLayerInfo _pool_info; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLRangeKernel.cpp b/src/core/CL/kernels/CLRangeKernel.cpp index a4c30b63c2..892f1c7c9f 100644 --- a/src/core/CL/kernels/CLRangeKernel.cpp +++ b/src/core/CL/kernels/CLRangeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLRangeKernel.h" +#include "src/core/CL/kernels/CLRangeKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLRangeKernel.h b/src/core/CL/kernels/CLRangeKernel.h new file mode 100644 index 0000000000..1b94a099ed --- /dev/null +++ b/src/core/CL/kernels/CLRangeKernel.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLRANGEKERNEL_H +#define ARM_COMPUTE_CLRANGEKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Kernel class for Range + * + * range generates a 1-D tensor containing a sequence of numbers that begins at 'start' and extends by increments + * of 'step' up to but not including 'end'. + */ +class CLRangeKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLRangeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLRangeKernel(const CLRangeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLRangeKernel &operator=(const CLRangeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLRangeKernel(CLRangeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLRangeKernel &operator=(CLRangeKernel &&) = default; + /** Default destructor */ + ~CLRangeKernel() = default; + /** Initialize the kernel's output tensor, start, end and step of the sequence. + * + * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] start The starting value of the sequence. + * @param[in] end The ending (not including) value of the sequence. + * @param[in] step The gap between each pair of values in the sequence. + */ + void configure(ICLTensor *output, float start, float end, float step); + /** Initialize the kernel's output tensor, start, end and step of the sequence. + * + * @param[in] compile_context The compile context to be used. + * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] start The starting value of the sequence. + * @param[in] end The ending (not including) value of the sequence. + * @param[in] step The gap between each pair of values in the sequence. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *output, float start, float end, float step); + /** Static function to check if given info will lead to a valid configuration of @ref CLRangeKernel + * + * @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] start The starting value of the sequence. + * @param[in] end The ending (not including) value of the sequence. + * @param[in] step The gap between each pair of values in the sequence. + * + * @return a status + */ + static Status validate(const ITensorInfo *output, float start, float end, float step); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + float _start; /**< Start of sequence */ + float _end; /**< End of sequence */ + float _step; /**< Increment/step value */ + ICLTensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLRANGEKERNEL_H */ diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp index 325e4b994c..9d49a2193a 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.cpp +++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" +#include "src/core/CL/kernels/CLReductionOperationKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLReductionOperationKernel.h b/src/core/CL/kernels/CLReductionOperationKernel.h new file mode 100644 index 0000000000..ff9fd61484 --- /dev/null +++ b/src/core/CL/kernels/CLReductionOperationKernel.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H +#define ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the reduction operation kernel + */ +class CLReductionOperationKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLReductionOperationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLReductionOperationKernel(const CLReductionOperationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLReductionOperationKernel &operator=(const CLReductionOperationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLReductionOperationKernel(CLReductionOperationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLReductionOperationKernel &operator=(CLReductionOperationKernel &&) = default; + /** Default destructor */ + ~CLReductionOperationKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 + * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX + * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image. + */ + void configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 + * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX + * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0); + + /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel. + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 + * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX + * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, unsigned int width = 0); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + unsigned int _reduction_axis; + ReductionOperation _op; + BorderSize _border_size; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLRemapKernel.cpp b/src/core/CL/kernels/CLRemapKernel.cpp index 8d3f41b35f..0ebeefcc74 100644 --- a/src/core/CL/kernels/CLRemapKernel.cpp +++ b/src/core/CL/kernels/CLRemapKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLRemapKernel.h" +#include "src/core/CL/kernels/CLRemapKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLRemapKernel.h b/src/core/CL/kernels/CLRemapKernel.h new file mode 100644 index 0000000000..8efcf091ed --- /dev/null +++ b/src/core/CL/kernels/CLRemapKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLREMAPKERNEL_H +#define ARM_COMPUTE_CLREMAPKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a remap on a tensor */ +class CLRemapKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLRemapKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLRemapKernel(const CLRemapKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLRemapKernel &operator=(const CLRemapKernel &) = delete; + /** Allow instances of this class to be moved */ + CLRemapKernel(CLRemapKernel &&) = default; + /** Allow instances of this class to be moved */ + CLRemapKernel &operator=(CLRemapKernel &&) = default; + /** Initialize the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] map_x Map for X coordinates. Data types supported: F32. + * @param[in] map_y Map for Y coordinates. Data types supported: F32. + * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. + * @param[in] policy The interpolation type. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); + /** Initialize the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] map_x Map for X coordinates. Data types supported: F32. + * @param[in] map_y Map for Y coordinates. Data types supported: F32. + * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. + * @param[in] policy The interpolation type. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_map_x; + const ICLTensor *_map_y; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLREMAPKERNEL_H */ diff --git a/src/core/CL/kernels/CLReorgLayerKernel.cpp b/src/core/CL/kernels/CLReorgLayerKernel.cpp index ade7761b91..662c790ca2 100644 --- a/src/core/CL/kernels/CLReorgLayerKernel.cpp +++ b/src/core/CL/kernels/CLReorgLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLReorgLayerKernel.h" +#include "src/core/CL/kernels/CLReorgLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLReorgLayerKernel.h b/src/core/CL/kernels/CLReorgLayerKernel.h new file mode 100644 index 0000000000..455a6170c6 --- /dev/null +++ b/src/core/CL/kernels/CLReorgLayerKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLREORGLAYERKERNEL_H +#define ARM_COMPUTE_CLREORGLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a reorg layer */ +class CLReorgLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLReorgLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLReorgLayerKernel(const CLReorgLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLReorgLayerKernel &operator=(const CLReorgLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLReorgLayerKernel(CLReorgLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLReorgLayerKernel &operator=(CLReorgLayerKernel &&) = default; + /** Initialize the kernel's input, output. + * + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor with tensor shape: + * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has + * the same number of input elements. Data types supported: same as @p input. + * @param[in] stride Stride value to use for reorganizing the values in the output tensor. + * It defines the spatial distance between 2 consecutive pixels in the x and y direction + */ + void configure(const ICLTensor *input, ICLTensor *output, int32_t stride); + /** Initialize the kernel's input, output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor with tensor shape: + * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has + * the same number of input elements. Data types supported: same as @p input. + * @param[in] stride Stride value to use for reorganizing the values in the output tensor. + * It defines the spatial distance between 2 consecutive pixels in the x and y direction + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t stride); + /** Static function to check if given info will lead to a valid configuration of @ref CLReorgLayerKernel + * + * @param[in] input Source tensor. Data types supported: All. + * @param[in] output Destination tensor with tensor shape: + * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has + * the same number of input elements. Data types supported: same as @p input. Data types supported: same as @p input. + * @param[in] stride Stride value to use for reorganizing the values in the output tensor + * It defines the spatial distance between 2 consecutive pixels in the x and y direction + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t stride); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLREORGLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/CL/kernels/CLReshapeLayerKernel.cpp index b14013bc34..58d7843624 100644 --- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp +++ b/src/core/CL/kernels/CLReshapeLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h" +#include "src/core/CL/kernels/CLReshapeLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.h b/src/core/CL/kernels/CLReshapeLayerKernel.h new file mode 100644 index 0000000000..902c44649b --- /dev/null +++ b/src/core/CL/kernels/CLReshapeLayerKernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLRESHAPELAYERKERNEL_H +#define ARM_COMPUTE_CLRESHAPELAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to perform tensor reshaping */ +class CLReshapeLayerKernel : public ICLKernel +{ +public: + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor info. Data type supported: All. + * @param[out] output Destination tensor info. Data type supported: Same as @p input + */ + void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output); + + /** Static function to check if given info will lead to a valid configuration of @ref CLReshapeLayerKernel + * + * @param[in] input Source tensor info. Data type supported: All + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLRESHAPELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLReverseKernel.cpp b/src/core/CL/kernels/CLReverseKernel.cpp index f8240984d1..9a876258e9 100644 --- a/src/core/CL/kernels/CLReverseKernel.cpp +++ b/src/core/CL/kernels/CLReverseKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLReverseKernel.h" +#include "src/core/CL/kernels/CLReverseKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLReverseKernel.h b/src/core/CL/kernels/CLReverseKernel.h new file mode 100644 index 0000000000..4a21e4f802 --- /dev/null +++ b/src/core/CL/kernels/CLReverseKernel.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLREVERSEKERNEL_H +#define ARM_COMPUTE_CLREVERSEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the reverse kernel */ +class CLReverseKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLReverseKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLReverseKernel(const CLReverseKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLReverseKernel &operator=(const CLReverseKernel &) = delete; + /** Allow instances of this class to be moved */ + CLReverseKernel(CLReverseKernel &&) = default; + /** Allow instances of this class to be moved */ + CLReverseKernel &operator=(CLReverseKernel &&) = default; + /** Default destructor */ + ~CLReverseKernel() = default; + /** Initialise the kernel's inputis and output + * + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis); + /** Initialise the kernel's inputis and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis); + + /** Static function to check if given info will lead to a valid configuration of @ref CLReverseKernel + * + * @param[in] input Input tensor info. Data types supported: All. + * @param[in] output Output tensor info. Data type supported: Same as @p input + * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +public: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_axis; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLREVERSEKERNEL_H */ diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp index 8233f210b4..5a7d5830fd 100644 --- a/src/core/CL/kernels/CLScaleKernel.cpp +++ b/src/core/CL/kernels/CLScaleKernel.cpp @@ -21,11 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLScaleKernel.h" +#include "src/core/CL/kernels/CLScaleKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Error.h" @@ -33,6 +32,7 @@ #include "arm_compute/core/TensorInfo.h" #include "src/core/AccessWindowStatic.h" #include "src/core/CL/CLValidate.h" +#include "src/core/CL/ICLKernel.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLScaleKernel.h b/src/core/CL/kernels/CLScaleKernel.h new file mode 100644 index 0000000000..a72e3938d9 --- /dev/null +++ b/src/core/CL/kernels/CLScaleKernel.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSCALEKERNEL_H +#define ARM_COMPUTE_CLSCALEKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the scale kernel */ +class CLScaleKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's inputs, output and interpolation policy + * + * @param[in] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 + * @param[out] output Destination tensor. Data types supported: Same as @p input + * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] info @ref ScaleKernelInfo Kernel descriptor to be used to configure. + */ + void configure(const ICLTensor *input, ICLTensor *output, const ScaleKernelInfo &info); + /** Initialise the kernel's inputs, output and interpolation policy + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 + * @param[out] output Destination tensor. Data types supported: Same as @p input + * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] info @ref ScaleKernelInfo Kernel descriptor to be used to configure. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ScaleKernelInfo &info); + + /** Static function to check if given info will lead to a valid configuration of @ref CLScaleKernel + * + * @param[in] input Source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 + * @param[in] output Destination tensor info. Data types supported: Same as @p input + * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] info @ref ScaleKernelInfo Kernel descriptor to be used to validate + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ScaleKernelInfo &info); + /** Input tensor accessor. + * + * @return Pointer to input tensor. + */ + const ICLTensor *input() const; + /** Output tensor accessor. + * + * @return Pointer to output tensor. + */ + const ICLTensor *output() const; + + // Inherited methods overridden: + BorderSize border_size() const override; + void run(const Window &window, cl::CommandQueue &queue) override; + + // Getter for interpolation policy + InterpolationPolicy get_interpolation_policy() const + { + return _interpolation_policy; + } + +private: + InterpolationPolicy _interpolation_policy = InterpolationPolicy::BILINEAR; + DataLayout _data_layout = DataLayout::UNKNOWN; + bool _align_corners = false; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLSCALEKERNEL_H */ diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.cpp b/src/core/CL/kernels/CLScharr3x3Kernel.cpp index 1e33af3047..7ceddc9626 100644 --- a/src/core/CL/kernels/CLScharr3x3Kernel.cpp +++ b/src/core/CL/kernels/CLScharr3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLScharr3x3Kernel.h" +#include "src/core/CL/kernels/CLScharr3x3Kernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.h b/src/core/CL/kernels/CLScharr3x3Kernel.h new file mode 100644 index 0000000000..a670da5b6f --- /dev/null +++ b/src/core/CL/kernels/CLScharr3x3Kernel.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSCHARR3X3KERNEL_H +#define ARM_COMPUTE_CLSCHARR3X3KERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 Scharr filter on a tensor. + * + * @f[ + * \mathbf{G}_x=\begin{vmatrix} + * -3 & 0 & +3\\ + * -10& 0 & +10\\ + * -3 & 0 & +3 + * \end{vmatrix} + * @f] + * @f[ + * \mathbf{G}_y=\begin{vmatrix} + * -3 & -10 & -3\\ + * 0 & 0 & 0\\ + * +3 & +10 & +3 + * \end{vmatrix} + * @f] + */ +class CLScharr3x3Kernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLScharr3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLScharr3x3Kernel(const CLScharr3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLScharr3x3Kernel &operator=(const CLScharr3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLScharr3x3Kernel(CLScharr3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLScharr3x3Kernel &operator=(CLScharr3x3Kernel &&) = default; + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + bool _run_scharr_x; /**< Do we need to run Scharr X ? */ + bool _run_scharr_y; /**< Do we need to run Scharr Y ? */ + const ICLTensor *_input; /**< Input image */ + ICLTensor *_output_x; /**< Output image for scharr X */ + ICLTensor *_output_y; /**< Output image for scharr Y */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLSCHARR3X3KERNEL_H */ diff --git a/src/core/CL/kernels/CLSelectKernel.cpp b/src/core/CL/kernels/CLSelectKernel.cpp index d9a1044e1f..53e5414c88 100644 --- a/src/core/CL/kernels/CLSelectKernel.cpp +++ b/src/core/CL/kernels/CLSelectKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLSelectKernel.h" +#include "src/core/CL/kernels/CLSelectKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLSelectKernel.h b/src/core/CL/kernels/CLSelectKernel.h new file mode 100644 index 0000000000..93ae27f444 --- /dev/null +++ b/src/core/CL/kernels/CLSelectKernel.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSELECTKERNEL_H +#define ARM_COMPUTE_CLSELECTKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** OpenCL interface for executing the select kernel + * + * Select is computed by: + * @f[ output(i) = condition(i) ? x(i) : y(i) @f] + **/ +class CLSelectKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLSelectKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSelectKernel(const CLSelectKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSelectKernel &operator=(const CLSelectKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSelectKernel(CLSelectKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSelectKernel &operator=(CLSelectKernel &&) = default; + /** Default destructor */ + ~CLSelectKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] c Condition input tensor. Data types supported: U8. + * @param[in] x First input tensor. Data types supported: All. + * @param[out] y Second input tensor. Data types supported: Same as @p x + * @param[in] output Output tensor. Data types supported: Same as @p x. + */ + void configure(const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output); + /** Initialise the kernel's inputs and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] c Condition input tensor. Data types supported: U8. + * @param[in] x First input tensor. Data types supported: All. + * @param[out] y Second input tensor. Data types supported: Same as @p x + * @param[in] output Output tensor. Data types supported: Same as @p x. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLSelectKernel + * + * @param[in] c Condition input tensor. Data types supported: U8. + * @param[in] x First input tensor. Data types supported: All. + * @param[in] y Second input tensor. Data types supported: Same as @p x + * @param[in] output Output tensor. Data types supported: Same as @p x. + * + * @return a status + */ + static Status validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_c; /**< Condition tensor */ + const ICLTensor *_x; /**< Source tensor 1 */ + const ICLTensor *_y; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ + bool _has_same_rank; /**< Flag that indicates if condition tensor and other inputs have the same rank */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLWHEREKERNEL_H */ diff --git a/src/core/CL/kernels/CLSobel3x3Kernel.cpp b/src/core/CL/kernels/CLSobel3x3Kernel.cpp index 89e5207c44..a87677a38f 100644 --- a/src/core/CL/kernels/CLSobel3x3Kernel.cpp +++ b/src/core/CL/kernels/CLSobel3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLSobel3x3Kernel.h" +#include "src/core/CL/kernels/CLSobel3x3Kernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLSobel3x3Kernel.h b/src/core/CL/kernels/CLSobel3x3Kernel.h new file mode 100644 index 0000000000..fed8068762 --- /dev/null +++ b/src/core/CL/kernels/CLSobel3x3Kernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSOBEL3X3KERNEL_H +#define ARM_COMPUTE_CLSOBEL3X3KERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 Sobel filter on a tensor. */ +class CLSobel3x3Kernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel3x3Kernel(const CLSobel3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel3x3Kernel &operator=(const CLSobel3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel3x3Kernel(CLSobel3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel3x3Kernel &operator=(CLSobel3x3Kernel &&) = default; + /** Default destructor */ + ~CLSobel3x3Kernel() = default; + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< Output tensor for Sobel X */ + ICLTensor *_output_y; /**< Output tensor for Sobel Y */ + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLSOBEL3X3KERNEL_H */ diff --git a/src/core/CL/kernels/CLSobel5x5Kernel.cpp b/src/core/CL/kernels/CLSobel5x5Kernel.cpp index 3e765e47fb..c450becd1d 100644 --- a/src/core/CL/kernels/CLSobel5x5Kernel.cpp +++ b/src/core/CL/kernels/CLSobel5x5Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h" +#include "src/core/CL/kernels/CLSobel5x5Kernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLSobel5x5Kernel.h b/src/core/CL/kernels/CLSobel5x5Kernel.h new file mode 100644 index 0000000000..a163ac932a --- /dev/null +++ b/src/core/CL/kernels/CLSobel5x5Kernel.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSOBEL5X5KERNEL_H +#define ARM_COMPUTE_CLSOBEL5X5KERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. */ +class CLSobel5x5HorKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel5x5HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel5x5HorKernel(const CLSobel5x5HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel5x5HorKernel &operator=(const CLSobel5x5HorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel5x5HorKernel(CLSobel5x5HorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel5x5HorKernel &operator=(CLSobel5x5HorKernel &&) = default; + /** Default destructor */ + ~CLSobel5x5HorKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< X output of horizontal pass */ + ICLTensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 5x5 Sobel filter on a tensor. */ +class CLSobel5x5VertKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel5x5VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel5x5VertKernel(const CLSobel5x5VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel5x5VertKernel &operator=(const CLSobel5x5VertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel5x5VertKernel(CLSobel5x5VertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel5x5VertKernel &operator=(CLSobel5x5VertKernel &&) = default; + /** Default destructor */ + ~CLSobel5x5VertKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set and the corresponding input. + * + * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16. + * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set and the corresponding input. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16. + * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */ + const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */ + ICLTensor *_output_x; /**< X output of sobel */ + ICLTensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLSOBEL5X5KERNEL_H */ diff --git a/src/core/CL/kernels/CLSobel7x7Kernel.cpp b/src/core/CL/kernels/CLSobel7x7Kernel.cpp index 37ceaba502..1cfa74f7b3 100644 --- a/src/core/CL/kernels/CLSobel7x7Kernel.cpp +++ b/src/core/CL/kernels/CLSobel7x7Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h" +#include "src/core/CL/kernels/CLSobel7x7Kernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLSobel7x7Kernel.h b/src/core/CL/kernels/CLSobel7x7Kernel.h new file mode 100644 index 0000000000..c85f0aedf9 --- /dev/null +++ b/src/core/CL/kernels/CLSobel7x7Kernel.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSOBEL7X7KERNEL_H +#define ARM_COMPUTE_CLSOBEL7X7KERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. */ +class CLSobel7x7HorKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel7x7HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel7x7HorKernel(const CLSobel7x7HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel7x7HorKernel &operator=(const CLSobel7x7HorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel7x7HorKernel(CLSobel7x7HorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel7x7HorKernel &operator=(CLSobel7x7HorKernel &&) = default; + /** Default destructor */ + ~CLSobel7x7HorKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< X output of horizontal pass */ + ICLTensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 7x7 Sobel filter on a tensor. */ +class CLSobel7x7VertKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel7x7VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel7x7VertKernel(const CLSobel7x7VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel7x7VertKernel &operator=(const CLSobel7x7VertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel7x7VertKernel(CLSobel7x7VertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel7x7VertKernel &operator=(CLSobel7x7VertKernel &&) = default; + /** Default destructor */ + ~CLSobel7x7VertKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set and the corresponding input. + * + * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32. + * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set and the corresponding input. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32. + * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */ + const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */ + ICLTensor *_output_x; /**< X output of sobel */ + ICLTensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLSOBEL7X7KERNEL_H */ diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp index 5c0acda41a..d9f498c522 100644 --- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" +#include "src/core/CL/kernels/CLSoftmaxLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.h b/src/core/CL/kernels/CLSoftmaxLayerKernel.h new file mode 100644 index 0000000000..29e0f63e46 --- /dev/null +++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H +#define ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "src/core/CL/ICLSimple3DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for max, shifting, exponentiating and summing the logits */ +class CLLogits1DMaxShiftExpSumKernel : public ICLKernel +{ +public: + /** Info for whether a parallel reduction will be run and the vector size of the execution. */ + using ParallelReductionInfo = std::tuple; + +public: + /** Default constructor */ + CLLogits1DMaxShiftExpSumKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DMaxShiftExpSumKernel(const CLLogits1DMaxShiftExpSumKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DMaxShiftExpSumKernel &operator=(const CLLogits1DMaxShiftExpSumKernel &) = delete; + /** Allow instances of this class to be moved */ + CLLogits1DMaxShiftExpSumKernel(CLLogits1DMaxShiftExpSumKernel &&) = default; + /** Allow instances of this class to be moved */ + CLLogits1DMaxShiftExpSumKernel &operator=(CLLogits1DMaxShiftExpSumKernel &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in,out] max Max values tensor. Data types supported: same as @p input + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input + * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. + */ + void configure(const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in,out] max Max values tensor. Data types supported: same as @p input + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input + * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxShiftExpSumKernel + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] max Max values tensor. Data types supported: same as @p input + * @param[in] output Destination tensor. Data types supported: same as @p input + * @param[in] sum Sum of 1D logits tensor. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum); + /** Checks if the given size is eligible for parallel reduction + * + * @note Serial reduction is launched for width < (_grid_size * _serial_vector_size). + * @note Parallel reduction is launched for width >= (_grid_size * _serial_vector_size) and vector_size is forced to 4. + * + * @param[in] size Size to check + * + * @return A two-element tuple where the first element is a boolean specifying if a parallel reduction will be run, + * while the second element is the vector size of the execution. + */ + static ParallelReductionInfo is_parallel_reduction(size_t size); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_max; + ICLTensor *_output; + ICLTensor *_sum; + +private: + static const unsigned int _grid_size; + static const unsigned int _serial_vector_size; + static const unsigned int _parallel_vector_size; +}; +/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */ +class CLLogits1DNormKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLogits1DNormKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DNormKernel(const CLLogits1DNormKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DNormKernel &operator=(const CLLogits1DNormKernel &) = delete; + /** Allow instances of this class to be moved */ + CLLogits1DNormKernel(CLLogits1DNormKernel &&) = default; + /** Allow instances of this class to be moved */ + CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported. + * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input + * @param[out] output Destination tensor. Data types supported: QASYMM8/QASYMM8_SIGNED for S32 @p input, or same as @p input + * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. + */ + void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported. + * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input + * @param[out] output Destination tensor. Data types supported: QASYMM8/QASYMM8_SIGNED for S32 @p input, or same as @p input + * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DNormKernel + * + * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported. + * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input + * @param[in] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input + * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, const SoftmaxKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_sum; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp index c6f70c3c09..91b889a10a 100644 --- a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp +++ b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h" +#include "src/core/CL/kernels/CLSpaceToBatchLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.h b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.h new file mode 100644 index 0000000000..4819c80fce --- /dev/null +++ b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H +#define ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the space to batch kernel */ +class CLSpaceToBatchLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLSpaceToBatchLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSpaceToBatchLayerKernel(const CLSpaceToBatchLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSpaceToBatchLayerKernel &operator=(const CLSpaceToBatchLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSpaceToBatchLayerKernel(CLSpaceToBatchLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSpaceToBatchLayerKernel &operator=(CLSpaceToBatchLayerKernel &&) = default; + /** Default destructor */ + ~CLSpaceToBatchLayerKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output); + /** Initialise the kernel's inputs and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output); + /** Initialise the kernel's input and output. (Static block shape and paddings) + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[in] padding_left The left padding of the output tensor. + * @param[in] padding_right The right padding of the output tensor. + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output); + /** Initialise the kernel's input and output. (Static block shape and paddings) + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[in] padding_left The left padding of the output tensor. + * @param[in] padding_right The right padding of the output tensor. + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, + ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 + * @param[in] output Tensor output. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel (Static block shape and paddings) + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[in] padding_left The left padding of the output tensor. + * @param[in] padding_right The right padding of the output tensor. + * @param[in] output Tensor output. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + const ICLTensor *_block_shape; /**< Block shape tensor */ + const ICLTensor *_paddings; /**< Paddings tensor */ + ICLTensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp index 2d46aade34..1c648e0944 100644 --- a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp +++ b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h" +#include "src/core/CL/kernels/CLSpaceToDepthLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.h b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.h new file mode 100644 index 0000000000..bb1ac5f9a6 --- /dev/null +++ b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H +#define ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the space to depth kernel */ +class CLSpaceToDepthLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLSpaceToDepthLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSpaceToDepthLayerKernel(const CLSpaceToDepthLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSpaceToDepthLayerKernel &operator=(const CLSpaceToDepthLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSpaceToDepthLayerKernel(CLSpaceToDepthLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSpaceToDepthLayerKernel &operator=(CLSpaceToDepthLayerKernel &&) = default; + /** Default destructor */ + ~CLSpaceToDepthLayerKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + */ + void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape); + /** Initialise the kernel's inputs and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape); + /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToDepthLayerKernel. + * + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. + * @param[in] output Tensor output info. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + ICLTensor *_output; /**< Destination tensor */ + int32_t _block_shape; /**< Block shape */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLStackLayerKernel.cpp b/src/core/CL/kernels/CLStackLayerKernel.cpp index 5055065779..9bdcc8dc3f 100644 --- a/src/core/CL/kernels/CLStackLayerKernel.cpp +++ b/src/core/CL/kernels/CLStackLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLStackLayerKernel.h" +#include "src/core/CL/kernels/CLStackLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLStackLayerKernel.h b/src/core/CL/kernels/CLStackLayerKernel.h new file mode 100644 index 0000000000..2865127a90 --- /dev/null +++ b/src/core/CL/kernels/CLStackLayerKernel.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_CLSTACKLAYERKERNEL_H +#define ARM_COMPUTE_CLSTACKLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/ +class CLStackLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLStackLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLStackLayerKernel(const CLStackLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLStackLayerKernel &operator=(const CLStackLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLStackLayerKernel(CLStackLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLStackLayerKernel &operator=(CLStackLayerKernel &&) = default; + /** Default destructor */ + ~CLStackLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @note Supported input tensor rank: up to 4 + * + * @param[in] input Input tensor. Data types supported: All. + * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. + * @param[in] idx_input Index of the input tensor in the list of tensors to stack. + * All tensors in the list must have the same shape + * @param[in] num_tensors Number of tensors to stack + * @param[out] output Output tensor. Data types supported: Same as @p input. + * + */ + void configure(const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output); + /** Initialise the kernel's inputs and output + * + * @note Supported input tensor rank: up to 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. + * @param[in] idx_input Index of the input tensor in the list of tensors to stack. + * All tensors in the list must have the same shape + * @param[in] num_tensors Number of tensors to stack + * @param[out] output Output tensor. Data types supported: Same as @p input. + * + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLStackLayerKernel + * + * @note Supported input tensor rank: up to 4 + * + * @param[in] input Input tensor info. Data types supported: All. + * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. + * @param[in] idx_input Index of the input tensor in the list of tensors to stack + * All tensors in the list must have the same shape + * @param[in] num_tensors Number of tensors to stack + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLSTACKLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp index b632e05d84..c87fcb9765 100644 --- a/src/core/CL/kernels/CLStridedSliceKernel.cpp +++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h" +#include "src/core/CL/kernels/CLStridedSliceKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/utils/helpers/tensor_transform.h" diff --git a/src/core/CL/kernels/CLStridedSliceKernel.h b/src/core/CL/kernels/CLStridedSliceKernel.h new file mode 100644 index 0000000000..599cf34c39 --- /dev/null +++ b/src/core/CL/kernels/CLStridedSliceKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H +#define ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +#include + +namespace arm_compute +{ +/** Interface for the kernel to perform tensor strided slicing */ +class CLStridedSliceKernel : public ICLKernel +{ +public: + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor info. Data type supported: All. + * @param[out] output Destination tensor info. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. + * A slice of size 1 starting from starts[i] in the dimension must be preserved. + */ + void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, + const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, + int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); + + /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] input Source tensor. Data type supported: All. + * @param[in] output Destination tensor. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. + * A slice of size 1 starting from starts[i] in the dimension must be preserved. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, + int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H */ diff --git a/src/core/CL/kernels/CLTableLookupKernel.cpp b/src/core/CL/kernels/CLTableLookupKernel.cpp index 3b8ca60ab1..b82f4c9889 100644 --- a/src/core/CL/kernels/CLTableLookupKernel.cpp +++ b/src/core/CL/kernels/CLTableLookupKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h" +#include "src/core/CL/kernels/CLTableLookupKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLLut.h" diff --git a/src/core/CL/kernels/CLTableLookupKernel.h b/src/core/CL/kernels/CLTableLookupKernel.h new file mode 100644 index 0000000000..c8d15cbee2 --- /dev/null +++ b/src/core/CL/kernels/CLTableLookupKernel.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLTABLELOOKUPKERNEL_H +#define ARM_COMPUTE_CLTABLELOOKUPKERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; +class ICLLut; + +/** Interface for the kernel to perform table lookup calculations. */ +class CLTableLookupKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input, lut and output. + * + * @param[in] input An input tensor. Data types supported: U8, S16. + * @param[in] lut The input LUT. Data types supported: U8, S16. + * @param[out] output The output tensor. Data types supported: U8, S16. + */ + void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output); + /** Initialise the kernel's input, lut and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8, S16. + * @param[in] lut The input LUT. Data types supported: U8, S16. + * @param[out] output The output tensor. Data types supported: U8, S16. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output); +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLTABLELOOKUPKERNEL_H */ diff --git a/src/core/CL/kernels/CLThresholdKernel.cpp b/src/core/CL/kernels/CLThresholdKernel.cpp index de81644edd..72c22f043c 100644 --- a/src/core/CL/kernels/CLThresholdKernel.cpp +++ b/src/core/CL/kernels/CLThresholdKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLThresholdKernel.h" +#include "src/core/CL/kernels/CLThresholdKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLThresholdKernel.h b/src/core/CL/kernels/CLThresholdKernel.h new file mode 100644 index 0000000000..511eaed1bf --- /dev/null +++ b/src/core/CL/kernels/CLThresholdKernel.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLTHRESHOLDKERNEL_H +#define ARM_COMPUTE_CLTHRESHOLDKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** Interface for the thresholding kernel. */ +class CLThresholdKernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input, output and threshold parameters. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] info Threshold descriptor + */ + void configure(const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info); + /**Initialise the kernel's input, output and threshold parameters. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] info Threshold descriptor + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info); +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */ diff --git a/src/core/CL/kernels/CLTileKernel.cpp b/src/core/CL/kernels/CLTileKernel.cpp index 43c8953363..c0c3d2e2ee 100644 --- a/src/core/CL/kernels/CLTileKernel.cpp +++ b/src/core/CL/kernels/CLTileKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLTileKernel.h" +#include "src/core/CL/kernels/CLTileKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/helpers/AutoConfiguration.h" diff --git a/src/core/CL/kernels/CLTileKernel.h b/src/core/CL/kernels/CLTileKernel.h new file mode 100644 index 0000000000..41752ca90b --- /dev/null +++ b/src/core/CL/kernels/CLTileKernel.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLTILEKERNEL_H +#define ARM_COMPUTE_CLTILEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a Tile operation */ +class CLTileKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLTileKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLTileKernel(const CLTileKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLTileKernel &operator=(const CLTileKernel &) = delete; + /** Allow instances of this class to be moved */ + CLTileKernel(CLTileKernel &&) = default; + /** Allow instances of this class to be moved */ + CLTileKernel &operator=(CLTileKernel &&) = default; + /** Default destructor */ + ~CLTileKernel() = default; + /** Set the source, destination of the kernel + * + * @param[in] input Source tensor. Data type supported: All. + * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. + * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported). + * @param[out] output Destination tensor. Same as @p input + * + */ + void configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples); + /** Set the source, destination of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: All. + * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. + * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported). + * @param[out] output Destination tensor. Same as @p input + * + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples); + /** Static function to check if given info will lead to a valid configuration of @ref CLTileKernel + * + * @param[in] input Source tensor info. Data type supported: All. + * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. + * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported). + * @param[in] output Destination tensor info. Same as @p input + * + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLTILEKERNEL_H */ diff --git a/src/core/CL/kernels/CLTransposeKernel.cpp b/src/core/CL/kernels/CLTransposeKernel.cpp index bd910196e9..8d967e901f 100644 --- a/src/core/CL/kernels/CLTransposeKernel.cpp +++ b/src/core/CL/kernels/CLTransposeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" +#include "src/core/CL/kernels/CLTransposeKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLTransposeKernel.h b/src/core/CL/kernels/CLTransposeKernel.h new file mode 100644 index 0000000000..0c4b7b4aff --- /dev/null +++ b/src/core/CL/kernels/CLTransposeKernel.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLTRANSPOSEKERNEL_H +#define ARM_COMPUTE_CLTRANSPOSEKERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel which transposes the elements of a matrix. + * + * [width, height, batch] -> [height, width, batch] + * + */ +class CLTransposeKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLTransposeKernel + * + * @param[in] input Input tensor. Data types supported: All. + * @param[in] output Output tensor. Data type supported: Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLTRANSPOSEKERNEL_H */ diff --git a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp index a4fc10f26a..acb2fbcd04 100644 --- a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp +++ b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h" +#include "src/core/CL/kernels/CLUpsampleLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLUpsampleLayerKernel.h b/src/core/CL/kernels/CLUpsampleLayerKernel.h new file mode 100644 index 0000000000..f90ee07bf4 --- /dev/null +++ b/src/core/CL/kernels/CLUpsampleLayerKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H +#define ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the UpsampleLayer kernel on OpenCL. */ +class CLUpsampleLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLUpsampleLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLUpsampleLayerKernel(const CLUpsampleLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLUpsampleLayerKernel &operator=(const CLUpsampleLayerKernel &) = delete; + /** Default Move Constructor. */ + CLUpsampleLayerKernel(CLUpsampleLayerKernel &&) = default; + /** Default move assignment operator */ + CLUpsampleLayerKernel &operator=(CLUpsampleLayerKernel &&) = default; + /** Default destructor */ + ~CLUpsampleLayerKernel() = default; + + /** Initialise the kernel's input and output. + * + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] info Contains stride information described in @ref Size2D. + * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. + */ + void configure(const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] info Contains stride information described in @ref Size2D. + * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy); + /** Static function to check if given info will lead to a valid configuration of @ref CLUpsampleLayerKernel + * + * @param[in] input Source tensor info. Data types supported: All. + * @param[in] output Destination tensor info. Data types supported: same as @p input. + * @param[in] info Contains stride information described in @ref Size2D. + * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy upsampling_policy); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + Size2D _info; + DataLayout _data_layout; + unsigned int _num_elems_processed_per_iteration_input_x; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLWarpAffineKernel.cpp b/src/core/CL/kernels/CLWarpAffineKernel.cpp index 95a7c1b875..600c67a528 100644 --- a/src/core/CL/kernels/CLWarpAffineKernel.cpp +++ b/src/core/CL/kernels/CLWarpAffineKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h" +#include "src/core/CL/kernels/CLWarpAffineKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLWarpAffineKernel.h b/src/core/CL/kernels/CLWarpAffineKernel.h new file mode 100644 index 0000000000..c600ee780d --- /dev/null +++ b/src/core/CL/kernels/CLWarpAffineKernel.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLWARPAFFINEKERNEL_H +#define ARM_COMPUTE_CLWARPAFFINEKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the warp affine kernel.*/ +class CLWarpAffineKernel : public ICLSimple2DKernel +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 2x3 of type float + * The matrix argument requires 9 values, the last 3 values are ignored. + * @param[in] policy The interpolation type. + */ + void configure(const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 2x3 of type float + * The matrix argument requires 9 values, the last 3 values are ignored. + * @param[in] policy The interpolation type. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLWARPAFFINEKERNEL_H */ diff --git a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp index 2fe1feb485..5f20a0bdd3 100644 --- a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp +++ b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h" +#include "src/core/CL/kernels/CLWarpPerspectiveKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLWarpPerspectiveKernel.h b/src/core/CL/kernels/CLWarpPerspectiveKernel.h new file mode 100644 index 0000000000..dcbe1c5560 --- /dev/null +++ b/src/core/CL/kernels/CLWarpPerspectiveKernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H +#define ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; +/** Interface for the warp perspective kernel.*/ +class CLWarpPerspectiveKernel : public ICLSimple2DKernel +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 3x3 of type float. + * @param[in] policy The interpolation type. + */ + void configure(const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 3x3 of type float. + * @param[in] policy The interpolation type. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H */ diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp index c06c2d3ec7..559f47ce26 100644 --- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp +++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" +#include "src/core/CL/kernels/CLWeightsReshapeKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.h b/src/core/CL/kernels/CLWeightsReshapeKernel.h new file mode 100644 index 0000000000..402a60472b --- /dev/null +++ b/src/core/CL/kernels/CLWeightsReshapeKernel.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H +#define ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +/** OpenCL kernel to perform reshaping on the weights used by convolution and locally connected layer + * + * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. + * In combination with the @ref CLIm2ColKernel can transform a convolution to a matrix multiplication. + * + * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have: + * @f[ + * \left( \begin{array}{ccc} + * a000 & a001 & a002 \\ + * a010 & a011 & a012 \\ + * a020 & a021 & a022 \\ + * \end{array} \right) + * \left( \begin{array}{ccc} + * a100 & a101 & a102 \\ + * a110 & a111 & a112 \\ + * a120 & a121 & a122 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccc} + * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\ + * \end{array} \right) + * @f] + */ +class CLWeightsReshapeKernel : public ICLKernel +{ +public: + /** Constructor.*/ + CLWeightsReshapeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWeightsReshapeKernel(const CLWeightsReshapeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWeightsReshapeKernel &operator=(const CLWeightsReshapeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLWeightsReshapeKernel(CLWeightsReshapeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLWeightsReshapeKernel &operator=(CLWeightsReshapeKernel &&) = default; + /** Default destructor */ + ~CLWeightsReshapeKernel() = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All + * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr. + * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. + * @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise. + * Data types supported: Same as @p input + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. + */ + void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1); + /** Set the input and output of the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All + * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr. + * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. + * @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise. + * Data types supported: Same as @p input + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1); + /** Static function to check if given info will lead to a valid configuration of @ref CLWeightsReshapeKernel + * + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All + * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr. + * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. + * @param[in] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise. + * Data types supported: Same as @p input + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups = 1); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_biases; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H */ \ No newline at end of file diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp index a7a3463f59..d6697ba46b 100644 --- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" +#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h new file mode 100644 index 0000000000..2af89e12eb --- /dev/null +++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H +#define ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +/** Interface for the width concatenate kernel of 2 tensors. + * The input1 and input2 tensors will be concatenated into the output tensor. + */ +class CLWidthConcatenate2TensorsKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLWidthConcatenate2TensorsKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWidthConcatenate2TensorsKernel(const CLWidthConcatenate2TensorsKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWidthConcatenate2TensorsKernel &operator=(const CLWidthConcatenate2TensorsKernel &) = delete; + /** Allow instances of this class to be moved */ + CLWidthConcatenate2TensorsKernel(CLWidthConcatenate2TensorsKernel &&) = default; + /** Allow instances of this class to be moved */ + CLWidthConcatenate2TensorsKernel &operator=(CLWidthConcatenate2TensorsKernel &&) = default; + /** Default destructor */ + ~CLWidthConcatenate2TensorsKernel() = default; + /** Initialise the kernel's input1s and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 First input tensor. Data types supported: All. + * @param[in] input2 Second input tensor. Data types supported: same as @p input1 + * @param[out] output Output tensor. Data types supported: Same as @p input1. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate2TensorsKernel + * + * @param[in] input1 First tensor info. Data types supported: All. + * @param[in] input2 Second tensor info. Data types supported: same as @p input1 + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H */ diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp index 1c8fef2db3..7ecdd30224 100644 --- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" +#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h new file mode 100644 index 0000000000..0caf87114d --- /dev/null +++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H +#define ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +/** Interface for the width concatenate kernel of 4 tensors. + * All input tensors will be concatenated into the output tensor. + */ +class CLWidthConcatenate4TensorsKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLWidthConcatenate4TensorsKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWidthConcatenate4TensorsKernel(const CLWidthConcatenate4TensorsKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWidthConcatenate4TensorsKernel &operator=(const CLWidthConcatenate4TensorsKernel &) = delete; + /** Allow instances of this class to be moved */ + CLWidthConcatenate4TensorsKernel(CLWidthConcatenate4TensorsKernel &&) = default; + /** Allow instances of this class to be moved */ + CLWidthConcatenate4TensorsKernel &operator=(CLWidthConcatenate4TensorsKernel &&) = default; + /** Default destructor */ + ~CLWidthConcatenate4TensorsKernel() = default; + /** Initialise the kernel's input1s and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 First input tensor. Data types supported: All. + * @param[in] input2 Second input tensor. Data types supported: same as @p input1 + * @param[in] input3 Third input tensor. Data types supported: same as @p input1 + * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1 + * @param[out] output Output tensor. Data types supported: Same as @p input1. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *input3, ITensorInfo *input4, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate4TensorsKernel + * + * @param[in] input1 First tensor info. Data types supported: All. + * @param[in] input2 Second tensor info. Data types supported: same as @p input1 + * @param[in] input3 Third tensor info. Data types supported: same as @p input1 + * @param[in] input4 Fourth tensor info. Data types supported: same as @p input1 + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H */ diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp index a9a601dc8e..30d0a481bd 100644 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h" +#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h new file mode 100644 index 0000000000..09c3f4455d --- /dev/null +++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H +#define ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +/** Interface for the width concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class CLWidthConcatenateLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLWidthConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWidthConcatenateLayerKernel(const CLWidthConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWidthConcatenateLayerKernel &operator=(const CLWidthConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLWidthConcatenateLayerKernel(CLWidthConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLWidthConcatenateLayerKernel &operator=(CLWidthConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~CLWidthConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[in] width_offset The offset on the X axis. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int width_offset, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: All. + * @param[in] width_offset The offset on the X axis. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp index e2f9ca5726..bd45ddb65f 100644 --- a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h" +#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" @@ -83,7 +83,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen const unsigned int num_elems_processed_per_iteration_y = input->dimension(1); const unsigned int num_elems_read_per_iteration_z = input->data_layout() == DataLayout::NCHW ? 1 : input->dimension(2); - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y, num_elems_read_per_iteration_z)); + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y, num_elems_read_per_iteration_z)); Window win_collapsed = win.collapse(win, Window::DimZ); return std::make_pair(Status{}, win_collapsed); } diff --git a/src/core/CL/kernels/CLWinogradFilterTransformKernel.h b/src/core/CL/kernels/CLWinogradFilterTransformKernel.h new file mode 100644 index 0000000000..d22fedebcd --- /dev/null +++ b/src/core/CL/kernels/CLWinogradFilterTransformKernel.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H +#define ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the Winograd filter transform kernel. */ +class CLWinogradFilterTransformKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLWinogradFilterTransformKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWinogradFilterTransformKernel(const CLWinogradFilterTransformKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWinogradFilterTransformKernel &operator=(const CLWinogradFilterTransformKernel &) = delete; + /** Allow instances of this class to be moved */ + CLWinogradFilterTransformKernel(CLWinogradFilterTransformKernel &&) = default; + /** Allow instances of this class to be moved */ + CLWinogradFilterTransformKernel &operator=(CLWinogradFilterTransformKernel &&) = default; + /** Default destructor */ + ~CLWinogradFilterTransformKernel() = default; + /** Set the input and output tensor. + * + * @note Winograd filter transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd filter transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32. + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo + */ + void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); + /** Set the input and output tensor. + * + * @note Winograd filter transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd filter transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32. + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradFilterTransformKernel + * + * @note Winograd filter transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd filter transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32. + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H */ diff --git a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp index 15c239e849..6f695c93db 100644 --- a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h" +#include "src/core/CL/kernels/CLWinogradInputTransformKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLWinogradInputTransformKernel.h b/src/core/CL/kernels/CLWinogradInputTransformKernel.h new file mode 100644 index 0000000000..25301877e6 --- /dev/null +++ b/src/core/CL/kernels/CLWinogradInputTransformKernel.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H +#define ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform Winograd input transform.*/ +class CLWinogradInputTransformKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLWinogradInputTransformKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWinogradInputTransformKernel(const CLWinogradInputTransformKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWinogradInputTransformKernel &operator=(const CLWinogradInputTransformKernel &) = delete; + /** Allow instances of this class to be moved */ + CLWinogradInputTransformKernel(CLWinogradInputTransformKernel &&) = default; + /** Allow instances of this class to be moved */ + CLWinogradInputTransformKernel &operator=(CLWinogradInputTransformKernel &&) = default; + /** Set the input and output of the kernel. + * + * @note Winograd input transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd input transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] input The input tensor to transform. Data types supported: F16/F32 + * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. + */ + void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); + /** Set the input and output of the kernel. + * + * @note Winograd input transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd input transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to transform. Data types supported: F16/F32 + * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransformKernel + * + * @note Winograd input transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd input transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] input The input tensor to transform. Data types supported: F16/F32 + * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + using WinogradKey = std::pair, std::pair>; + + BorderSize _border_size; + const ICLTensor *_input; + ICLTensor *_output; + DataLayout _data_layout; + int _num_tiles_x; + int _num_tiles_y; + unsigned int _step_z; +}; +} // arm_compute +#endif /*ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H */ diff --git a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp index 89a5176756..2018559f60 100644 --- a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h" +#include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLWinogradOutputTransformKernel.h b/src/core/CL/kernels/CLWinogradOutputTransformKernel.h new file mode 100644 index 0000000000..632a5629d9 --- /dev/null +++ b/src/core/CL/kernels/CLWinogradOutputTransformKernel.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H +#define ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the Winograd output transform kernel. */ +class CLWinogradOutputTransformKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLWinogradOutputTransformKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWinogradOutputTransformKernel(const CLWinogradOutputTransformKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWinogradOutputTransformKernel &operator=(const CLWinogradOutputTransformKernel &) = delete; + /** Allow instances of this class to be moved */ + CLWinogradOutputTransformKernel(CLWinogradOutputTransformKernel &&) = default; + /** Allow instances of this class to be moved */ + CLWinogradOutputTransformKernel &operator=(CLWinogradOutputTransformKernel &&) = default; + /** Default destructor */ + ~CLWinogradOutputTransformKernel() = default; + /** Set the input and output tensor. + * + * @note Winograd output transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd output transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32. + * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Set the input and output tensor. + * + * @note Winograd output transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd output transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32. + * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradOutputTransformKernel + * + * @note Winograd output transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd output transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32. + * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo + * @param[in] act_info (Optional) Activation layer information in case of a fused activation @ref ActivationLayerInfo. Only RELU, BOUNDED_RELU, LU_BOUNDED_RELU, LEAKY_RELU and SOFT_RELU supported. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + using WinogradKey = std::pair, std::pair>; + + const ICLTensor *_input; + const ICLTensor *_bias; + ICLTensor *_output; + bool _is_nhwc; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H */ diff --git a/src/core/CL/kernels/CLYOLOLayerKernel.cpp b/src/core/CL/kernels/CLYOLOLayerKernel.cpp index 0c7588d740..e12d1e7a65 100644 --- a/src/core/CL/kernels/CLYOLOLayerKernel.cpp +++ b/src/core/CL/kernels/CLYOLOLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLYOLOLayerKernel.h" +#include "src/core/CL/kernels/CLYOLOLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLHelpers.h" diff --git a/src/core/CL/kernels/CLYOLOLayerKernel.h b/src/core/CL/kernels/CLYOLOLayerKernel.h new file mode 100644 index 0000000000..5b1d56e9e5 --- /dev/null +++ b/src/core/CL/kernels/CLYOLOLayerKernel.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLYOLOLAYERKERNEL_H +#define ARM_COMPUTE_CLYOLOLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the YOLO layer kernel that performs partial activation. + * For each box, activate only: + * - x and y position (channel 0 and 1 of each box) + * - objectiveness (channel 4 of each box) + * - classes (channel 5 to (classes - 5) of each box) + */ +class CLYOLOLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLYOLOLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLYOLOLayerKernel(const CLYOLOLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLYOLOLayerKernel &operator=(const CLYOLOLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLYOLOLayerKernel(CLYOLOLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLYOLOLayerKernel &operator=(CLYOLOLayerKernel &&) = default; + /** Default destructor */ + ~CLYOLOLayerKernel() = default; + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) + */ + void configure(ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); + /** Static function to check if given info will lead to a valid configuration of @ref CLYOLOLayerKernel + * + * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result + * of the activation function. Data types supported: F16/F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; + bool _run_in_place; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLYOLOLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h b/src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h new file mode 100644 index 0000000000..4c92ae417f --- /dev/null +++ b/src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H +#define ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor. + */ +class ICLDepthwiseConvolutionLayer3x3Kernel : public ICLKernel +{ +public: + /** Default constructor */ + ICLDepthwiseConvolutionLayer3x3Kernel() + : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_y(1), _output_multipliers(), _output_shifts(), _is_quantized(false) + { + } + /** Prevent instances of this class from being copied (As this class contains pointers) */ + ICLDepthwiseConvolutionLayer3x3Kernel(const ICLDepthwiseConvolutionLayer3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + ICLDepthwiseConvolutionLayer3x3Kernel &operator=(const ICLDepthwiseConvolutionLayer3x3Kernel &) = delete; + /** Default Move Constructor. */ + ICLDepthwiseConvolutionLayer3x3Kernel(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default; + /** Default move assignment operator */ + ICLDepthwiseConvolutionLayer3x3Kernel &operator=(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default; + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. + * Data type supported: Same as @p input, QASYMM8/QSYMM8_PER_CHANNEL when input is QASYMM8. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported for QASYMM8. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + virtual void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0; + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. + * Data type supported: Same as @p input, QASYMM8/QSYMM8_PER_CHANNEL when input is QASYMM8. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported for QASYMM8. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + virtual void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0; + +protected: + BorderSize _border_size; + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_weights; + const ICLTensor *_biases; + unsigned int _conv_stride_y; + const ICLTensor *_output_multipliers; + const ICLTensor *_output_shifts; + bool _is_quantized; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H */ -- cgit v1.2.1