From bef7fa27b0d231a8649952f60808132d109b6345 Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Wed, 21 Oct 2020 15:58:54 +0100 Subject: COMPMID-3639: (3RDPARTY_UPDATE) Move CL kernels to src Change-Id: I10d27db788e5086adae1841e3e2441cd9b76ef84 Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4310 Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- 3rdparty | 2 +- arm_compute/core/CL/CLKernels.h | 163 --------- arm_compute/core/CL/CLTypes.h | 35 ++ arm_compute/core/CL/ICLKernel.h | 404 --------------------- arm_compute/core/CL/ICLSimple2DKernel.h | 41 --- arm_compute/core/CL/ICLSimple3DKernel.h | 43 --- arm_compute/core/CL/ICLSimpleKernel.h | 66 ---- .../core/CL/kernels/CLAbsoluteDifferenceKernel.h | 79 ---- arm_compute/core/CL/kernels/CLAccumulateKernel.h | 114 ------ .../core/CL/kernels/CLActivationLayerKernel.h | 77 ---- .../core/CL/kernels/CLArgMinMaxLayerKernel.h | 106 ------ .../CL/kernels/CLBatchConcatenateLayerKernel.h | 82 ----- .../CL/kernels/CLBatchNormalizationLayerKernel.h | 120 ------ .../core/CL/kernels/CLBatchToSpaceLayerKernel.h | 111 ------ arm_compute/core/CL/kernels/CLBitwiseAndKernel.h | 76 ---- arm_compute/core/CL/kernels/CLBitwiseNotKernel.h | 56 --- arm_compute/core/CL/kernels/CLBitwiseOrKernel.h | 76 ---- arm_compute/core/CL/kernels/CLBitwiseXorKernel.h | 76 ---- .../core/CL/kernels/CLBoundingBoxTransformKernel.h | 99 ----- arm_compute/core/CL/kernels/CLBox3x3Kernel.h | 59 --- arm_compute/core/CL/kernels/CLCannyEdgeKernel.h | 185 ---------- .../core/CL/kernels/CLChannelCombineKernel.h | 102 ------ .../core/CL/kernels/CLChannelExtractKernel.h | 95 ----- .../core/CL/kernels/CLChannelShuffleLayerKernel.h | 82 ----- arm_compute/core/CL/kernels/CLCol2ImKernel.h | 106 ------ arm_compute/core/CL/kernels/CLColorConvertKernel.h | 121 ------ arm_compute/core/CL/kernels/CLComparisonKernel.h | 89 ----- .../kernels/CLConvertFullyConnectedWeightsKernel.h | 90 ----- arm_compute/core/CL/kernels/CLConvolutionKernel.h | 224 ------------ arm_compute/core/CL/kernels/CLCopyKernel.h | 83 ----- arm_compute/core/CL/kernels/CLCropKernel.h | 103 ------ .../kernels/CLDeconvolutionLayerUpsampleKernel.h | 86 ----- .../kernels/CLDeconvolutionReshapeOutputKernel.h | 106 ------ .../CL/kernels/CLDepthConcatenateLayerKernel.h | 80 ---- .../core/CL/kernels/CLDepthConvertLayerKernel.h | 91 ----- .../core/CL/kernels/CLDepthToSpaceLayerKernel.h | 84 ----- .../CLDepthwiseConvolutionLayer3x3NCHWKernel.h | 114 ------ .../CLDepthwiseConvolutionLayer3x3NHWCKernel.h | 113 ------ .../CLDepthwiseConvolutionLayerNativeKernel.h | 131 ------- ...DepthwiseConvolutionLayerReshapeWeightsKernel.h | 85 ----- .../core/CL/kernels/CLDequantizationLayerKernel.h | 79 ---- arm_compute/core/CL/kernels/CLDerivativeKernel.h | 83 ----- arm_compute/core/CL/kernels/CLDilateKernel.h | 59 --- .../CL/kernels/CLDirectConvolutionLayerKernel.h | 125 ------- .../CL/kernels/CLElementWiseUnaryLayerKernel.h | 66 ---- .../core/CL/kernels/CLElementwiseOperationKernel.h | 212 ----------- arm_compute/core/CL/kernels/CLErodeKernel.h | 59 --- .../core/CL/kernels/CLFFTDigitReverseKernel.h | 89 ----- .../core/CL/kernels/CLFFTRadixStageKernel.h | 97 ----- arm_compute/core/CL/kernels/CLFFTScaleKernel.h | 86 ----- arm_compute/core/CL/kernels/CLFastCornersKernel.h | 133 ------- arm_compute/core/CL/kernels/CLFillBorderKernel.h | 96 ----- arm_compute/core/CL/kernels/CLFlattenLayerKernel.h | 83 ----- arm_compute/core/CL/kernels/CLFloorKernel.h | 81 ----- .../CL/kernels/CLFuseBatchNormalizationKernel.h | 126 ------- .../kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h | 108 ------ .../CLGEMMLowpMatrixMultiplyReshapedKernel.h | 123 ------- ...CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h | 155 -------- .../kernels/CLGEMMLowpOffsetContributionKernel.h | 116 ------ ...CLGEMMLowpOffsetContributionOutputStageKernel.h | 135 ------- ...MLowpQuantizeDownInt32ScaleByFixedPointKernel.h | 89 ----- ...CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h | 101 ------ .../CLGEMMLowpQuantizeDownInt32ScaleKernel.h | 102 ------ .../core/CL/kernels/CLGEMMLowpReductionKernel.h | 176 --------- .../core/CL/kernels/CLGEMMMatrixMultiplyKernel.h | 122 ------- .../CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h | 127 ------- .../kernels/CLGEMMMatrixMultiplyReshapedKernel.h | 188 ---------- .../CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h | 168 --------- .../CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h | 84 ----- .../core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h | 105 ------ .../core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h | 133 ------- arm_compute/core/CL/kernels/CLGatherKernel.h | 89 ----- arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h | 59 --- arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h | 83 ----- .../core/CL/kernels/CLGaussianPyramidKernel.h | 111 ------ .../CL/kernels/CLGenerateProposalsLayerKernel.h | 85 ----- .../core/CL/kernels/CLHOGDescriptorKernel.h | 122 ------- arm_compute/core/CL/kernels/CLHOGDetectorKernel.h | 96 ----- .../core/CL/kernels/CLHarrisCornersKernel.h | 100 ----- .../CL/kernels/CLHeightConcatenateLayerKernel.h | 77 ---- arm_compute/core/CL/kernels/CLHistogramKernel.h | 111 ------ arm_compute/core/CL/kernels/CLIm2ColKernel.h | 136 ------- .../kernels/CLInstanceNormalizationLayerKernel.h | 90 ----- .../core/CL/kernels/CLIntegralImageKernel.h | 86 ----- .../core/CL/kernels/CLL2NormalizeLayerKernel.h | 100 ----- arm_compute/core/CL/kernels/CLLKTrackerKernel.h | 240 ------------ .../CLLocallyConnectedMatrixMultiplyKernel.h | 85 ----- .../core/CL/kernels/CLMagnitudePhaseKernel.h | 90 ----- .../core/CL/kernels/CLMaxUnpoolingLayerKernel.h | 86 ----- arm_compute/core/CL/kernels/CLMeanStdDevKernel.h | 98 ----- .../CL/kernels/CLMeanStdDevNormalizationKernel.h | 90 ----- arm_compute/core/CL/kernels/CLMedian3x3Kernel.h | 59 --- arm_compute/core/CL/kernels/CLMemsetKernel.h | 85 ----- arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h | 87 ----- .../core/CL/kernels/CLMinMaxLocationKernel.h | 124 ------- .../core/CL/kernels/CLNonLinearFilterKernel.h | 77 ---- .../CL/kernels/CLNonMaximaSuppression3x3Kernel.h | 60 --- .../core/CL/kernels/CLNormalizationLayerKernel.h | 90 ----- .../CL/kernels/CLNormalizePlanarYUVLayerKernel.h | 94 ----- arm_compute/core/CL/kernels/CLPadLayerKernel.h | 96 ----- arm_compute/core/CL/kernels/CLPermuteKernel.h | 90 ----- .../CL/kernels/CLPixelWiseMultiplicationKernel.h | 196 ---------- arm_compute/core/CL/kernels/CLPoolingLayerKernel.h | 96 ----- .../core/CL/kernels/CLPriorBoxLayerKernel.h | 99 ----- .../CL/kernels/CLQLSTMLayerNormalizationKernel.h | 88 ----- .../core/CL/kernels/CLQuantizationLayerKernel.h | 86 ----- .../core/CL/kernels/CLROIAlignLayerKernel.h | 110 ------ .../core/CL/kernels/CLROIPoolingLayerKernel.h | 92 ----- arm_compute/core/CL/kernels/CLRangeKernel.h | 92 ----- .../core/CL/kernels/CLReductionOperationKernel.h | 99 ----- arm_compute/core/CL/kernels/CLRemapKernel.h | 81 ----- arm_compute/core/CL/kernels/CLReorgLayerKernel.h | 90 ----- arm_compute/core/CL/kernels/CLReshapeLayerKernel.h | 59 --- arm_compute/core/CL/kernels/CLReverseKernel.h | 84 ----- arm_compute/core/CL/kernels/CLScaleKernel.h | 93 ----- arm_compute/core/CL/kernels/CLScharr3x3Kernel.h | 97 ----- arm_compute/core/CL/kernels/CLSelectKernel.h | 94 ----- arm_compute/core/CL/kernels/CLSobel3x3Kernel.h | 83 ----- arm_compute/core/CL/kernels/CLSobel5x5Kernel.h | 139 ------- arm_compute/core/CL/kernels/CLSobel7x7Kernel.h | 139 ------- arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h | 158 -------- .../core/CL/kernels/CLSpaceToBatchLayerKernel.h | 121 ------ .../core/CL/kernels/CLSpaceToDepthLayerKernel.h | 84 ----- arm_compute/core/CL/kernels/CLStackLayerKernel.h | 101 ------ arm_compute/core/CL/kernels/CLStridedSliceKernel.h | 79 ---- arm_compute/core/CL/kernels/CLTableLookupKernel.h | 55 --- arm_compute/core/CL/kernels/CLThresholdKernel.h | 57 --- arm_compute/core/CL/kernels/CLTileKernel.h | 88 ----- arm_compute/core/CL/kernels/CLTransposeKernel.h | 64 ---- .../core/CL/kernels/CLUpsampleLayerKernel.h | 89 ----- arm_compute/core/CL/kernels/CLWarpAffineKernel.h | 62 ---- .../core/CL/kernels/CLWarpPerspectiveKernel.h | 59 --- .../core/CL/kernels/CLWeightsReshapeKernel.h | 121 ------ .../CL/kernels/CLWidthConcatenate2TensorsKernel.h | 73 ---- .../CL/kernels/CLWidthConcatenate4TensorsKernel.h | 77 ---- .../CL/kernels/CLWidthConcatenateLayerKernel.h | 74 ---- .../CL/kernels/CLWinogradFilterTransformKernel.h | 115 ------ .../CL/kernels/CLWinogradInputTransformKernel.h | 121 ------ .../CL/kernels/CLWinogradOutputTransformKernel.h | 127 ------- arm_compute/core/CL/kernels/CLYOLOLayerKernel.h | 98 ----- .../ICLDepthwiseConvolutionLayer3x3Kernel.h | 105 ------ arm_compute/runtime/CL/ICLOperator.h | 4 +- arm_compute/runtime/CL/ICLSimpleFunction.h | 14 +- .../runtime/CL/functions/CLAbsoluteDifference.h | 1 + arm_compute/runtime/CL/functions/CLAccumulate.h | 1 + .../runtime/CL/functions/CLActivationLayer.h | 2 + .../runtime/CL/functions/CLArgMinMaxLayer.h | 26 +- .../CL/functions/CLBatchNormalizationLayer.h | 20 +- .../runtime/CL/functions/CLBatchToSpaceLayer.h | 20 +- arm_compute/runtime/CL/functions/CLBitwiseAnd.h | 1 + arm_compute/runtime/CL/functions/CLBitwiseNot.h | 1 + arm_compute/runtime/CL/functions/CLBitwiseOr.h | 1 + arm_compute/runtime/CL/functions/CLBitwiseXor.h | 1 + .../runtime/CL/functions/CLBoundingBoxTransform.h | 6 +- arm_compute/runtime/CL/functions/CLBox3x3.h | 1 + arm_compute/runtime/CL/functions/CLCannyEdge.h | 37 +- arm_compute/runtime/CL/functions/CLCast.h | 2 + .../runtime/CL/functions/CLChannelCombine.h | 1 + .../runtime/CL/functions/CLChannelExtract.h | 1 + .../runtime/CL/functions/CLChannelShuffleLayer.h | 3 + arm_compute/runtime/CL/functions/CLColorConvert.h | 1 + arm_compute/runtime/CL/functions/CLComparison.h | 2 + .../runtime/CL/functions/CLComputeAllAnchors.h | 5 +- .../runtime/CL/functions/CLConcatenateLayer.h | 3 +- .../CL/functions/CLConvertFullyConnectedWeights.h | 5 +- arm_compute/runtime/CL/functions/CLConvolution.h | 33 +- .../runtime/CL/functions/CLConvolutionLayer.h | 10 + arm_compute/runtime/CL/functions/CLCopy.h | 2 + arm_compute/runtime/CL/functions/CLCropResize.h | 10 +- .../CL/functions/CLDeconvolutionLayerUpsample.h | 19 +- .../runtime/CL/functions/CLDepthConvertLayer.h | 2 + .../runtime/CL/functions/CLDepthToSpaceLayer.h | 2 + .../CL/functions/CLDepthwiseConvolutionLayer.h | 59 +-- .../runtime/CL/functions/CLDequantizationLayer.h | 2 + arm_compute/runtime/CL/functions/CLDerivative.h | 1 + arm_compute/runtime/CL/functions/CLDilate.h | 1 + .../CL/functions/CLDirectConvolutionLayer.h | 18 +- .../runtime/CL/functions/CLElementWiseUnaryLayer.h | 2 + .../runtime/CL/functions/CLElementwiseOperations.h | 2 + .../runtime/CL/functions/CLEqualizeHistogram.h | 29 +- arm_compute/runtime/CL/functions/CLErode.h | 1 + arm_compute/runtime/CL/functions/CLFFT1D.h | 28 +- arm_compute/runtime/CL/functions/CLFFT2D.h | 6 + arm_compute/runtime/CL/functions/CLFastCorners.h | 29 +- arm_compute/runtime/CL/functions/CLFill.h | 1 + arm_compute/runtime/CL/functions/CLFillBorder.h | 1 + arm_compute/runtime/CL/functions/CLFlattenLayer.h | 2 + arm_compute/runtime/CL/functions/CLFloor.h | 2 + .../runtime/CL/functions/CLFullyConnectedLayer.h | 1 - .../CL/functions/CLFuseBatchNormalization.h | 10 +- arm_compute/runtime/CL/functions/CLGEMM.h | 102 +++--- .../runtime/CL/functions/CLGEMMConvolutionLayer.h | 25 +- .../CL/functions/CLGEMMDeconvolutionLayer.h | 22 +- .../CL/functions/CLGEMMLowpMatrixMultiplyCore.h | 35 +- .../runtime/CL/functions/CLGEMMLowpOutputStage.h | 7 + arm_compute/runtime/CL/functions/CLGather.h | 3 + arm_compute/runtime/CL/functions/CLGaussian3x3.h | 1 + arm_compute/runtime/CL/functions/CLGaussian5x5.h | 26 +- .../runtime/CL/functions/CLGaussianPyramid.h | 28 +- .../CL/functions/CLGenerateProposalsLayer.h | 39 +- arm_compute/runtime/CL/functions/CLHOGDescriptor.h | 23 +- arm_compute/runtime/CL/functions/CLHOGDetector.h | 19 +- arm_compute/runtime/CL/functions/CLHOGGradient.h | 16 +- .../runtime/CL/functions/CLHOGMultiDetection.h | 36 +- arm_compute/runtime/CL/functions/CLHarrisCorners.h | 40 +- arm_compute/runtime/CL/functions/CLHistogram.h | 2 +- .../CL/functions/CLInstanceNormalizationLayer.h | 3 + arm_compute/runtime/CL/functions/CLIntegralImage.h | 16 +- .../runtime/CL/functions/CLL2NormalizeLayer.h | 22 +- arm_compute/runtime/CL/functions/CLLSTMLayer.h | 184 +++++----- .../runtime/CL/functions/CLLocallyConnectedLayer.h | 30 +- arm_compute/runtime/CL/functions/CLMagnitude.h | 1 + .../runtime/CL/functions/CLMaxUnpoolingLayer.h | 21 +- arm_compute/runtime/CL/functions/CLMeanStdDev.h | 39 +- .../CL/functions/CLMeanStdDevNormalizationLayer.h | 2 + arm_compute/runtime/CL/functions/CLMedian3x3.h | 1 + .../runtime/CL/functions/CLMinMaxLocation.h | 28 +- .../runtime/CL/functions/CLNonLinearFilter.h | 1 + .../CL/functions/CLNonMaximaSuppression3x3.h | 1 + .../runtime/CL/functions/CLNormalizationLayer.h | 27 +- .../CL/functions/CLNormalizePlanarYUVLayer.h | 2 + arm_compute/runtime/CL/functions/CLOpticalFlow.h | 41 ++- arm_compute/runtime/CL/functions/CLPReluLayer.h | 3 +- arm_compute/runtime/CL/functions/CLPadLayer.h | 14 +- arm_compute/runtime/CL/functions/CLPermute.h | 2 + arm_compute/runtime/CL/functions/CLPhase.h | 1 + .../CL/functions/CLPixelWiseMultiplication.h | 8 +- arm_compute/runtime/CL/functions/CLPoolingLayer.h | 2 + arm_compute/runtime/CL/functions/CLPriorBoxLayer.h | 5 +- arm_compute/runtime/CL/functions/CLQLSTMLayer.h | 167 ++++----- .../runtime/CL/functions/CLQuantizationLayer.h | 3 + arm_compute/runtime/CL/functions/CLRNNLayer.h | 30 +- arm_compute/runtime/CL/functions/CLROIAlignLayer.h | 4 +- .../runtime/CL/functions/CLROIPoolingLayer.h | 6 +- arm_compute/runtime/CL/functions/CLRange.h | 2 + .../runtime/CL/functions/CLReductionOperation.h | 33 +- arm_compute/runtime/CL/functions/CLRemap.h | 1 + arm_compute/runtime/CL/functions/CLReorgLayer.h | 2 + arm_compute/runtime/CL/functions/CLReshapeLayer.h | 2 + arm_compute/runtime/CL/functions/CLReverse.h | 3 + arm_compute/runtime/CL/functions/CLScale.h | 2 + arm_compute/runtime/CL/functions/CLScharr3x3.h | 1 + arm_compute/runtime/CL/functions/CLSelect.h | 5 +- arm_compute/runtime/CL/functions/CLSlice.h | 2 + arm_compute/runtime/CL/functions/CLSobel3x3.h | 9 + arm_compute/runtime/CL/functions/CLSobel5x5.h | 24 +- arm_compute/runtime/CL/functions/CLSobel7x7.h | 24 +- arm_compute/runtime/CL/functions/CLSoftmaxLayer.h | 37 +- .../runtime/CL/functions/CLSpaceToBatchLayer.h | 19 +- .../runtime/CL/functions/CLSpaceToDepthLayer.h | 19 +- arm_compute/runtime/CL/functions/CLStackLayer.h | 21 +- arm_compute/runtime/CL/functions/CLTableLookup.h | 1 + arm_compute/runtime/CL/functions/CLThreshold.h | 1 + arm_compute/runtime/CL/functions/CLTile.h | 5 +- arm_compute/runtime/CL/functions/CLTranspose.h | 3 + arm_compute/runtime/CL/functions/CLUpsampleLayer.h | 12 +- arm_compute/runtime/CL/functions/CLWarpAffine.h | 1 + .../runtime/CL/functions/CLWarpPerspective.h | 1 + .../CL/functions/CLWinogradConvolutionLayer.h | 28 +- .../CL/functions/CLWinogradInputTransform.h | 2 + arm_compute/runtime/CL/functions/CLYOLOLayer.h | 5 +- arm_compute/runtime/IOperator.h | 3 +- docs/04_adding_operator.dox | 10 +- docs/ComputeLibrary.dir | 10 +- examples/cl_cache.cpp | 6 +- examples/cl_convolution.cpp | 5 +- examples/cl_events.cpp | 4 +- examples/cl_sgemm.cpp | 4 +- examples/gemm_tuner/cl_gemm_native.cpp | 3 +- examples/gemm_tuner/cl_gemm_reshaped.cpp | 5 +- examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp | 3 +- examples/neoncl_scale_median_gaussian.cpp | 3 +- src/core/CL/CLKernels.h | 163 +++++++++ src/core/CL/CLTracePoint.cpp | 2 +- src/core/CL/CLValidate.h | 1 + src/core/CL/ICLKernel.cpp | 2 +- src/core/CL/ICLKernel.h | 404 +++++++++++++++++++++ src/core/CL/ICLSimple2DKernel.cpp | 2 +- src/core/CL/ICLSimple2DKernel.h | 41 +++ src/core/CL/ICLSimple3DKernel.cpp | 4 +- src/core/CL/ICLSimple3DKernel.h | 43 +++ src/core/CL/ICLSimpleKernel.cpp | 3 +- src/core/CL/ICLSimpleKernel.h | 66 ++++ src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp | 4 +- src/core/CL/kernels/CLAbsoluteDifferenceKernel.h | 79 ++++ src/core/CL/kernels/CLAccumulateKernel.cpp | 2 +- src/core/CL/kernels/CLAccumulateKernel.h | 114 ++++++ src/core/CL/kernels/CLActivationLayerKernel.cpp | 2 +- src/core/CL/kernels/CLActivationLayerKernel.h | 77 ++++ src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp | 2 +- src/core/CL/kernels/CLArgMinMaxLayerKernel.h | 106 ++++++ .../CL/kernels/CLBatchConcatenateLayerKernel.cpp | 2 +- .../CL/kernels/CLBatchConcatenateLayerKernel.h | 82 +++++ .../CL/kernels/CLBatchNormalizationLayerKernel.cpp | 2 +- .../CL/kernels/CLBatchNormalizationLayerKernel.h | 120 ++++++ src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp | 2 +- src/core/CL/kernels/CLBatchToSpaceLayerKernel.h | 111 ++++++ src/core/CL/kernels/CLBitwiseAndKernel.cpp | 2 +- src/core/CL/kernels/CLBitwiseAndKernel.h | 76 ++++ src/core/CL/kernels/CLBitwiseNotKernel.cpp | 2 +- src/core/CL/kernels/CLBitwiseNotKernel.h | 56 +++ src/core/CL/kernels/CLBitwiseOrKernel.cpp | 2 +- src/core/CL/kernels/CLBitwiseOrKernel.h | 76 ++++ src/core/CL/kernels/CLBitwiseXorKernel.cpp | 2 +- src/core/CL/kernels/CLBitwiseXorKernel.h | 76 ++++ .../CL/kernels/CLBoundingBoxTransformKernel.cpp | 2 +- src/core/CL/kernels/CLBoundingBoxTransformKernel.h | 99 +++++ src/core/CL/kernels/CLBox3x3Kernel.cpp | 2 +- src/core/CL/kernels/CLBox3x3Kernel.h | 59 +++ src/core/CL/kernels/CLCannyEdgeKernel.cpp | 2 +- src/core/CL/kernels/CLCannyEdgeKernel.h | 185 ++++++++++ src/core/CL/kernels/CLChannelCombineKernel.cpp | 2 +- src/core/CL/kernels/CLChannelCombineKernel.h | 102 ++++++ src/core/CL/kernels/CLChannelExtractKernel.cpp | 2 +- src/core/CL/kernels/CLChannelExtractKernel.h | 95 +++++ .../CL/kernels/CLChannelShuffleLayerKernel.cpp | 2 +- src/core/CL/kernels/CLChannelShuffleLayerKernel.h | 82 +++++ src/core/CL/kernels/CLCol2ImKernel.cpp | 2 +- src/core/CL/kernels/CLCol2ImKernel.h | 106 ++++++ src/core/CL/kernels/CLColorConvertKernel.cpp | 2 +- src/core/CL/kernels/CLColorConvertKernel.h | 121 ++++++ src/core/CL/kernels/CLComparisonKernel.cpp | 2 +- src/core/CL/kernels/CLComparisonKernel.h | 89 +++++ .../CLConvertFullyConnectedWeightsKernel.cpp | 2 +- .../kernels/CLConvertFullyConnectedWeightsKernel.h | 90 +++++ src/core/CL/kernels/CLConvolutionKernel.cpp | 4 +- src/core/CL/kernels/CLConvolutionKernel.h | 224 ++++++++++++ src/core/CL/kernels/CLCopyKernel.cpp | 2 +- src/core/CL/kernels/CLCopyKernel.h | 83 +++++ src/core/CL/kernels/CLCropKernel.cpp | 2 +- src/core/CL/kernels/CLCropKernel.h | 103 ++++++ .../kernels/CLDeconvolutionLayerUpsampleKernel.cpp | 2 +- .../kernels/CLDeconvolutionLayerUpsampleKernel.h | 86 +++++ .../kernels/CLDeconvolutionReshapeOutputKernel.cpp | 2 +- .../kernels/CLDeconvolutionReshapeOutputKernel.h | 106 ++++++ .../CL/kernels/CLDepthConcatenateLayerKernel.cpp | 2 +- .../CL/kernels/CLDepthConcatenateLayerKernel.h | 80 ++++ src/core/CL/kernels/CLDepthConvertLayerKernel.cpp | 2 +- src/core/CL/kernels/CLDepthConvertLayerKernel.h | 91 +++++ src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp | 2 +- src/core/CL/kernels/CLDepthToSpaceLayerKernel.h | 84 +++++ .../CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp | 4 +- .../CLDepthwiseConvolutionLayer3x3NCHWKernel.h | 114 ++++++ .../CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp | 4 +- .../CLDepthwiseConvolutionLayer3x3NHWCKernel.h | 113 ++++++ .../CLDepthwiseConvolutionLayerNativeKernel.cpp | 4 +- .../CLDepthwiseConvolutionLayerNativeKernel.h | 131 +++++++ ...pthwiseConvolutionLayerReshapeWeightsKernel.cpp | 4 +- ...DepthwiseConvolutionLayerReshapeWeightsKernel.h | 85 +++++ .../CL/kernels/CLDequantizationLayerKernel.cpp | 2 +- src/core/CL/kernels/CLDequantizationLayerKernel.h | 79 ++++ src/core/CL/kernels/CLDerivativeKernel.cpp | 2 +- src/core/CL/kernels/CLDerivativeKernel.h | 83 +++++ src/core/CL/kernels/CLDilateKernel.cpp | 2 +- src/core/CL/kernels/CLDilateKernel.h | 59 +++ .../CL/kernels/CLDirectConvolutionLayerKernel.cpp | 2 +- .../CL/kernels/CLDirectConvolutionLayerKernel.h | 125 +++++++ .../CL/kernels/CLElementWiseUnaryLayerKernel.cpp | 2 +- .../CL/kernels/CLElementWiseUnaryLayerKernel.h | 66 ++++ .../CL/kernels/CLElementwiseOperationKernel.cpp | 2 +- src/core/CL/kernels/CLElementwiseOperationKernel.h | 212 +++++++++++ src/core/CL/kernels/CLErodeKernel.cpp | 2 +- src/core/CL/kernels/CLErodeKernel.h | 59 +++ src/core/CL/kernels/CLFFTDigitReverseKernel.cpp | 2 +- src/core/CL/kernels/CLFFTDigitReverseKernel.h | 89 +++++ src/core/CL/kernels/CLFFTRadixStageKernel.cpp | 2 +- src/core/CL/kernels/CLFFTRadixStageKernel.h | 97 +++++ src/core/CL/kernels/CLFFTScaleKernel.cpp | 2 +- src/core/CL/kernels/CLFFTScaleKernel.h | 86 +++++ src/core/CL/kernels/CLFastCornersKernel.cpp | 2 +- src/core/CL/kernels/CLFastCornersKernel.h | 133 +++++++ src/core/CL/kernels/CLFillBorderKernel.cpp | 2 +- src/core/CL/kernels/CLFillBorderKernel.h | 96 +++++ src/core/CL/kernels/CLFlattenLayerKernel.cpp | 2 +- src/core/CL/kernels/CLFlattenLayerKernel.h | 83 +++++ src/core/CL/kernels/CLFloorKernel.cpp | 2 +- src/core/CL/kernels/CLFloorKernel.h | 81 +++++ .../CL/kernels/CLFuseBatchNormalizationKernel.cpp | 2 +- .../CL/kernels/CLFuseBatchNormalizationKernel.h | 126 +++++++ .../CLGEMMLowpMatrixMultiplyNativeKernel.cpp | 2 +- .../kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h | 108 ++++++ .../CLGEMMLowpMatrixMultiplyReshapedKernel.cpp | 2 +- .../CLGEMMLowpMatrixMultiplyReshapedKernel.h | 123 +++++++ ...GEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp | 16 +- ...CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h | 155 ++++++++ .../kernels/CLGEMMLowpOffsetContributionKernel.cpp | 2 +- .../kernels/CLGEMMLowpOffsetContributionKernel.h | 116 ++++++ ...GEMMLowpOffsetContributionOutputStageKernel.cpp | 2 +- ...CLGEMMLowpOffsetContributionOutputStageKernel.h | 136 +++++++ ...owpQuantizeDownInt32ScaleByFixedPointKernel.cpp | 2 +- ...MLowpQuantizeDownInt32ScaleByFixedPointKernel.h | 89 +++++ ...GEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp | 2 +- ...CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h | 101 ++++++ .../CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp | 2 +- .../CLGEMMLowpQuantizeDownInt32ScaleKernel.h | 102 ++++++ src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp | 2 +- src/core/CL/kernels/CLGEMMLowpReductionKernel.h | 176 +++++++++ src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp | 2 +- src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h | 122 +++++++ .../kernels/CLGEMMMatrixMultiplyNativeKernel.cpp | 2 +- .../CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h | 127 +++++++ .../kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp | 2 +- .../kernels/CLGEMMMatrixMultiplyReshapedKernel.h | 188 ++++++++++ .../CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp | 6 +- .../CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h | 168 +++++++++ .../kernels/CLGEMMMatrixVectorMultiplyKernel.cpp | 2 +- .../CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h | 84 +++++ .../CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp | 2 +- src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h | 105 ++++++ .../CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp | 2 +- src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h | 135 +++++++ src/core/CL/kernels/CLGatherKernel.cpp | 2 +- src/core/CL/kernels/CLGatherKernel.h | 89 +++++ src/core/CL/kernels/CLGaussian3x3Kernel.cpp | 2 +- src/core/CL/kernels/CLGaussian3x3Kernel.h | 59 +++ src/core/CL/kernels/CLGaussian5x5Kernel.cpp | 2 +- src/core/CL/kernels/CLGaussian5x5Kernel.h | 83 +++++ src/core/CL/kernels/CLGaussianPyramidKernel.cpp | 2 +- src/core/CL/kernels/CLGaussianPyramidKernel.h | 111 ++++++ .../CL/kernels/CLGenerateProposalsLayerKernel.cpp | 2 +- .../CL/kernels/CLGenerateProposalsLayerKernel.h | 85 +++++ src/core/CL/kernels/CLHOGDescriptorKernel.cpp | 2 +- src/core/CL/kernels/CLHOGDescriptorKernel.h | 122 +++++++ src/core/CL/kernels/CLHOGDetectorKernel.cpp | 2 +- src/core/CL/kernels/CLHOGDetectorKernel.h | 96 +++++ src/core/CL/kernels/CLHarrisCornersKernel.cpp | 2 +- src/core/CL/kernels/CLHarrisCornersKernel.h | 100 +++++ .../CL/kernels/CLHeightConcatenateLayerKernel.cpp | 2 +- .../CL/kernels/CLHeightConcatenateLayerKernel.h | 77 ++++ src/core/CL/kernels/CLHistogramKernel.cpp | 2 +- src/core/CL/kernels/CLHistogramKernel.h | 111 ++++++ src/core/CL/kernels/CLIm2ColKernel.cpp | 2 +- src/core/CL/kernels/CLIm2ColKernel.h | 136 +++++++ .../kernels/CLInstanceNormalizationLayerKernel.cpp | 2 +- .../kernels/CLInstanceNormalizationLayerKernel.h | 90 +++++ src/core/CL/kernels/CLIntegralImageKernel.cpp | 2 +- src/core/CL/kernels/CLIntegralImageKernel.h | 86 +++++ src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp | 2 +- src/core/CL/kernels/CLL2NormalizeLayerKernel.h | 100 +++++ src/core/CL/kernels/CLLKTrackerKernel.cpp | 2 +- src/core/CL/kernels/CLLKTrackerKernel.h | 206 +++++++++++ .../CLLocallyConnectedMatrixMultiplyKernel.cpp | 2 +- .../CLLocallyConnectedMatrixMultiplyKernel.h | 85 +++++ src/core/CL/kernels/CLMagnitudePhaseKernel.cpp | 2 +- src/core/CL/kernels/CLMagnitudePhaseKernel.h | 90 +++++ src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp | 2 +- src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h | 86 +++++ src/core/CL/kernels/CLMeanStdDevKernel.cpp | 2 +- src/core/CL/kernels/CLMeanStdDevKernel.h | 98 +++++ .../CL/kernels/CLMeanStdDevNormalizationKernel.cpp | 2 +- .../CL/kernels/CLMeanStdDevNormalizationKernel.h | 90 +++++ src/core/CL/kernels/CLMedian3x3Kernel.cpp | 2 +- src/core/CL/kernels/CLMedian3x3Kernel.h | 59 +++ src/core/CL/kernels/CLMemsetKernel.cpp | 2 +- src/core/CL/kernels/CLMemsetKernel.h | 85 +++++ src/core/CL/kernels/CLMinMaxLayerKernel.cpp | 2 +- src/core/CL/kernels/CLMinMaxLayerKernel.h | 87 +++++ src/core/CL/kernels/CLMinMaxLocationKernel.cpp | 2 +- src/core/CL/kernels/CLMinMaxLocationKernel.h | 124 +++++++ src/core/CL/kernels/CLNonLinearFilterKernel.cpp | 2 +- src/core/CL/kernels/CLNonLinearFilterKernel.h | 77 ++++ .../CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp | 2 +- .../CL/kernels/CLNonMaximaSuppression3x3Kernel.h | 60 +++ src/core/CL/kernels/CLNormalizationLayerKernel.cpp | 2 +- src/core/CL/kernels/CLNormalizationLayerKernel.h | 90 +++++ .../CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp | 2 +- .../CL/kernels/CLNormalizePlanarYUVLayerKernel.h | 94 +++++ src/core/CL/kernels/CLPadLayerKernel.cpp | 2 +- src/core/CL/kernels/CLPadLayerKernel.h | 96 +++++ src/core/CL/kernels/CLPermuteKernel.cpp | 2 +- src/core/CL/kernels/CLPermuteKernel.h | 90 +++++ .../CL/kernels/CLPixelWiseMultiplicationKernel.cpp | 2 +- .../CL/kernels/CLPixelWiseMultiplicationKernel.h | 196 ++++++++++ src/core/CL/kernels/CLPoolingLayerKernel.cpp | 6 +- src/core/CL/kernels/CLPoolingLayerKernel.h | 96 +++++ src/core/CL/kernels/CLPriorBoxLayerKernel.cpp | 2 +- src/core/CL/kernels/CLPriorBoxLayerKernel.h | 99 +++++ .../CL/kernels/CLQLSTMLayerNormalizationKernel.cpp | 2 +- .../CL/kernels/CLQLSTMLayerNormalizationKernel.h | 88 +++++ src/core/CL/kernels/CLQuantizationLayerKernel.cpp | 2 +- src/core/CL/kernels/CLQuantizationLayerKernel.h | 86 +++++ src/core/CL/kernels/CLROIAlignLayerKernel.cpp | 2 +- src/core/CL/kernels/CLROIAlignLayerKernel.h | 110 ++++++ src/core/CL/kernels/CLROIPoolingLayerKernel.cpp | 2 +- src/core/CL/kernels/CLROIPoolingLayerKernel.h | 92 +++++ src/core/CL/kernels/CLRangeKernel.cpp | 2 +- src/core/CL/kernels/CLRangeKernel.h | 92 +++++ src/core/CL/kernels/CLReductionOperationKernel.cpp | 2 +- src/core/CL/kernels/CLReductionOperationKernel.h | 99 +++++ src/core/CL/kernels/CLRemapKernel.cpp | 2 +- src/core/CL/kernels/CLRemapKernel.h | 81 +++++ src/core/CL/kernels/CLReorgLayerKernel.cpp | 2 +- src/core/CL/kernels/CLReorgLayerKernel.h | 90 +++++ src/core/CL/kernels/CLReshapeLayerKernel.cpp | 2 +- src/core/CL/kernels/CLReshapeLayerKernel.h | 59 +++ src/core/CL/kernels/CLReverseKernel.cpp | 2 +- src/core/CL/kernels/CLReverseKernel.h | 84 +++++ src/core/CL/kernels/CLScaleKernel.cpp | 4 +- src/core/CL/kernels/CLScaleKernel.h | 93 +++++ src/core/CL/kernels/CLScharr3x3Kernel.cpp | 2 +- src/core/CL/kernels/CLScharr3x3Kernel.h | 97 +++++ src/core/CL/kernels/CLSelectKernel.cpp | 2 +- src/core/CL/kernels/CLSelectKernel.h | 94 +++++ src/core/CL/kernels/CLSobel3x3Kernel.cpp | 2 +- src/core/CL/kernels/CLSobel3x3Kernel.h | 83 +++++ src/core/CL/kernels/CLSobel5x5Kernel.cpp | 2 +- src/core/CL/kernels/CLSobel5x5Kernel.h | 139 +++++++ src/core/CL/kernels/CLSobel7x7Kernel.cpp | 2 +- src/core/CL/kernels/CLSobel7x7Kernel.h | 139 +++++++ src/core/CL/kernels/CLSoftmaxLayerKernel.cpp | 2 +- src/core/CL/kernels/CLSoftmaxLayerKernel.h | 158 ++++++++ src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp | 2 +- src/core/CL/kernels/CLSpaceToBatchLayerKernel.h | 122 +++++++ src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp | 2 +- src/core/CL/kernels/CLSpaceToDepthLayerKernel.h | 84 +++++ src/core/CL/kernels/CLStackLayerKernel.cpp | 2 +- src/core/CL/kernels/CLStackLayerKernel.h | 101 ++++++ src/core/CL/kernels/CLStridedSliceKernel.cpp | 2 +- src/core/CL/kernels/CLStridedSliceKernel.h | 79 ++++ src/core/CL/kernels/CLTableLookupKernel.cpp | 2 +- src/core/CL/kernels/CLTableLookupKernel.h | 55 +++ src/core/CL/kernels/CLThresholdKernel.cpp | 2 +- src/core/CL/kernels/CLThresholdKernel.h | 57 +++ src/core/CL/kernels/CLTileKernel.cpp | 2 +- src/core/CL/kernels/CLTileKernel.h | 88 +++++ src/core/CL/kernels/CLTransposeKernel.cpp | 2 +- src/core/CL/kernels/CLTransposeKernel.h | 64 ++++ src/core/CL/kernels/CLUpsampleLayerKernel.cpp | 2 +- src/core/CL/kernels/CLUpsampleLayerKernel.h | 89 +++++ src/core/CL/kernels/CLWarpAffineKernel.cpp | 2 +- src/core/CL/kernels/CLWarpAffineKernel.h | 62 ++++ src/core/CL/kernels/CLWarpPerspectiveKernel.cpp | 2 +- src/core/CL/kernels/CLWarpPerspectiveKernel.h | 59 +++ src/core/CL/kernels/CLWeightsReshapeKernel.cpp | 2 +- src/core/CL/kernels/CLWeightsReshapeKernel.h | 121 ++++++ .../kernels/CLWidthConcatenate2TensorsKernel.cpp | 2 +- .../CL/kernels/CLWidthConcatenate2TensorsKernel.h | 73 ++++ .../kernels/CLWidthConcatenate4TensorsKernel.cpp | 2 +- .../CL/kernels/CLWidthConcatenate4TensorsKernel.h | 77 ++++ .../CL/kernels/CLWidthConcatenateLayerKernel.cpp | 2 +- .../CL/kernels/CLWidthConcatenateLayerKernel.h | 74 ++++ .../CL/kernels/CLWinogradFilterTransformKernel.cpp | 4 +- .../CL/kernels/CLWinogradFilterTransformKernel.h | 115 ++++++ .../CL/kernels/CLWinogradInputTransformKernel.cpp | 2 +- .../CL/kernels/CLWinogradInputTransformKernel.h | 121 ++++++ .../CL/kernels/CLWinogradOutputTransformKernel.cpp | 2 +- .../CL/kernels/CLWinogradOutputTransformKernel.h | 127 +++++++ src/core/CL/kernels/CLYOLOLayerKernel.cpp | 2 +- src/core/CL/kernels/CLYOLOLayerKernel.h | 98 +++++ .../ICLDepthwiseConvolutionLayer3x3Kernel.h | 105 ++++++ src/graph/backends/CL/CLFunctionsFactory.cpp | 1 + src/graph/backends/CL/CLNodeValidator.cpp | 11 + src/runtime/CL/CLOperator.cpp | 2 + src/runtime/CL/CLScheduler.cpp | 2 +- src/runtime/CL/CLTuner.cpp | 2 +- src/runtime/CL/ICLSimpleFunction.cpp | 11 +- src/runtime/CL/functions/CLAbsoluteDifference.cpp | 2 +- src/runtime/CL/functions/CLAccumulate.cpp | 2 +- src/runtime/CL/functions/CLActivationLayer.cpp | 2 +- src/runtime/CL/functions/CLArgMinMaxLayer.cpp | 22 +- .../CL/functions/CLBatchNormalizationLayer.cpp | 14 +- src/runtime/CL/functions/CLBatchToSpaceLayer.cpp | 16 +- src/runtime/CL/functions/CLBitwiseAnd.cpp | 2 +- src/runtime/CL/functions/CLBitwiseNot.cpp | 2 +- src/runtime/CL/functions/CLBitwiseOr.cpp | 2 +- src/runtime/CL/functions/CLBitwiseXor.cpp | 2 +- .../CL/functions/CLBoundingBoxTransform.cpp | 2 +- src/runtime/CL/functions/CLBox3x3.cpp | 5 +- src/runtime/CL/functions/CLCannyEdge.cpp | 30 +- src/runtime/CL/functions/CLCast.cpp | 2 +- src/runtime/CL/functions/CLChannelCombine.cpp | 2 +- src/runtime/CL/functions/CLChannelExtract.cpp | 2 +- src/runtime/CL/functions/CLChannelShuffleLayer.cpp | 2 +- src/runtime/CL/functions/CLColorConvert.cpp | 2 +- src/runtime/CL/functions/CLComparison.cpp | 7 +- src/runtime/CL/functions/CLComputeAllAnchors.cpp | 1 + src/runtime/CL/functions/CLConcatenateLayer.cpp | 12 +- .../functions/CLConvertFullyConnectedWeights.cpp | 2 + src/runtime/CL/functions/CLConvolution.cpp | 32 +- src/runtime/CL/functions/CLConvolutionLayer.cpp | 3 +- src/runtime/CL/functions/CLCopy.cpp | 2 +- src/runtime/CL/functions/CLCropResize.cpp | 6 + src/runtime/CL/functions/CLDeconvolutionLayer.cpp | 1 - .../CL/functions/CLDeconvolutionLayerUpsample.cpp | 17 +- src/runtime/CL/functions/CLDepthConvertLayer.cpp | 2 +- src/runtime/CL/functions/CLDepthToSpaceLayer.cpp | 2 +- .../CL/functions/CLDepthwiseConvolutionLayer.cpp | 52 +-- src/runtime/CL/functions/CLDequantizationLayer.cpp | 2 +- src/runtime/CL/functions/CLDerivative.cpp | 5 +- src/runtime/CL/functions/CLDilate.cpp | 5 +- .../CL/functions/CLDirectConvolutionLayer.cpp | 21 +- .../CL/functions/CLDirectDeconvolutionLayer.cpp | 6 + .../CL/functions/CLElementWiseUnaryLayer.cpp | 2 +- .../CL/functions/CLElementwiseOperations.cpp | 2 +- src/runtime/CL/functions/CLEqualizeHistogram.cpp | 24 +- src/runtime/CL/functions/CLErode.cpp | 5 +- src/runtime/CL/functions/CLFFT1D.cpp | 30 +- src/runtime/CL/functions/CLFFT2D.cpp | 5 + src/runtime/CL/functions/CLFFTConvolutionLayer.cpp | 7 + src/runtime/CL/functions/CLFastCorners.cpp | 20 +- src/runtime/CL/functions/CLFill.cpp | 2 +- src/runtime/CL/functions/CLFillBorder.cpp | 2 +- src/runtime/CL/functions/CLFlattenLayer.cpp | 2 +- src/runtime/CL/functions/CLFloor.cpp | 2 +- src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 13 + .../CL/functions/CLFuseBatchNormalization.cpp | 10 +- src/runtime/CL/functions/CLGEMM.cpp | 142 +++++--- .../CL/functions/CLGEMMConvolutionLayer.cpp | 47 ++- .../CL/functions/CLGEMMDeconvolutionLayer.cpp | 25 +- .../CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp | 80 ++-- src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp | 9 +- src/runtime/CL/functions/CLGather.cpp | 2 +- src/runtime/CL/functions/CLGaussian3x3.cpp | 5 +- src/runtime/CL/functions/CLGaussian5x5.cpp | 24 +- src/runtime/CL/functions/CLGaussianPyramid.cpp | 47 ++- .../CL/functions/CLGenerateProposalsLayer.cpp | 57 +-- src/runtime/CL/functions/CLHOGDescriptor.cpp | 22 +- src/runtime/CL/functions/CLHOGDetector.cpp | 11 +- src/runtime/CL/functions/CLHOGGradient.cpp | 15 +- src/runtime/CL/functions/CLHOGMultiDetection.cpp | 21 +- src/runtime/CL/functions/CLHarrisCorners.cpp | 26 +- .../CL/functions/CLInstanceNormalizationLayer.cpp | 4 +- src/runtime/CL/functions/CLIntegralImage.cpp | 16 +- src/runtime/CL/functions/CLL2NormalizeLayer.cpp | 16 +- src/runtime/CL/functions/CLLSTMLayer.cpp | 57 ++- src/runtime/CL/functions/CLLSTMLayerQuantized.cpp | 8 + src/runtime/CL/functions/CLLaplacianPyramid.cpp | 3 + .../CL/functions/CLLaplacianReconstruct.cpp | 2 + .../CL/functions/CLLocallyConnectedLayer.cpp | 35 +- src/runtime/CL/functions/CLMagnitude.cpp | 2 +- src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp | 17 +- src/runtime/CL/functions/CLMeanStdDev.cpp | 18 +- .../functions/CLMeanStdDevNormalizationLayer.cpp | 2 +- src/runtime/CL/functions/CLMedian3x3.cpp | 5 +- src/runtime/CL/functions/CLMinMaxLocation.cpp | 17 +- src/runtime/CL/functions/CLNonLinearFilter.cpp | 5 +- .../CL/functions/CLNonMaximaSuppression3x3.cpp | 7 +- src/runtime/CL/functions/CLNormalizationLayer.cpp | 17 +- .../CL/functions/CLNormalizePlanarYUVLayer.cpp | 2 +- src/runtime/CL/functions/CLOpticalFlow.cpp | 38 +- src/runtime/CL/functions/CLPReluLayer.cpp | 2 +- src/runtime/CL/functions/CLPadLayer.cpp | 17 +- src/runtime/CL/functions/CLPermute.cpp | 2 +- src/runtime/CL/functions/CLPhase.cpp | 2 +- .../CL/functions/CLPixelWiseMultiplication.cpp | 15 +- src/runtime/CL/functions/CLPoolingLayer.cpp | 5 +- src/runtime/CL/functions/CLPriorBoxLayer.cpp | 4 +- src/runtime/CL/functions/CLQLSTMLayer.cpp | 91 ++++- src/runtime/CL/functions/CLQuantizationLayer.cpp | 2 +- src/runtime/CL/functions/CLRNNLayer.cpp | 24 +- src/runtime/CL/functions/CLROIAlignLayer.cpp | 3 +- src/runtime/CL/functions/CLROIPoolingLayer.cpp | 4 +- src/runtime/CL/functions/CLRange.cpp | 2 +- src/runtime/CL/functions/CLReduceMean.cpp | 3 +- src/runtime/CL/functions/CLReductionOperation.cpp | 42 ++- src/runtime/CL/functions/CLRemap.cpp | 5 +- src/runtime/CL/functions/CLReorgLayer.cpp | 2 +- src/runtime/CL/functions/CLReshapeLayer.cpp | 2 +- src/runtime/CL/functions/CLReverse.cpp | 2 +- src/runtime/CL/functions/CLScale.cpp | 5 +- src/runtime/CL/functions/CLScharr3x3.cpp | 5 +- src/runtime/CL/functions/CLSelect.cpp | 2 +- src/runtime/CL/functions/CLSlice.cpp | 2 +- src/runtime/CL/functions/CLSobel3x3.cpp | 7 +- src/runtime/CL/functions/CLSobel5x5.cpp | 33 +- src/runtime/CL/functions/CLSobel7x7.cpp | 33 +- src/runtime/CL/functions/CLSoftmaxLayer.cpp | 30 +- src/runtime/CL/functions/CLSpaceToBatchLayer.cpp | 21 +- src/runtime/CL/functions/CLSpaceToDepthLayer.cpp | 10 +- src/runtime/CL/functions/CLStackLayer.cpp | 11 +- src/runtime/CL/functions/CLStridedSlice.cpp | 2 +- src/runtime/CL/functions/CLTableLookup.cpp | 2 +- src/runtime/CL/functions/CLThreshold.cpp | 2 +- src/runtime/CL/functions/CLTile.cpp | 2 +- src/runtime/CL/functions/CLTranspose.cpp | 2 +- src/runtime/CL/functions/CLUpsampleLayer.cpp | 10 +- src/runtime/CL/functions/CLWarpAffine.cpp | 5 +- src/runtime/CL/functions/CLWarpPerspective.cpp | 5 +- .../CL/functions/CLWinogradConvolutionLayer.cpp | 23 +- .../CL/functions/CLWinogradInputTransform.cpp | 5 +- src/runtime/CL/functions/CLYOLOLayer.cpp | 2 +- src/runtime/CL/tuners/BifrostTuner.cpp | 2 +- src/runtime/CL/tuners/MidgardTuner.cpp | 2 +- tests/CL/Helper.h | 10 +- tests/benchmark/CL/Scale.cpp | 3 +- tests/validate_examples/cl_gemm.cpp | 19 +- tests/validation/CL/ArgMinMax.cpp | 6 +- tests/validation/CL/BatchNormalizationLayer.cpp | 22 +- tests/validation/CL/BatchToSpaceLayer.cpp | 2 +- tests/validation/CL/BoundingBoxTransform.cpp | 2 +- tests/validation/CL/CannyEdge.cpp | 5 +- tests/validation/CL/ChannelCombine.cpp | 2 +- tests/validation/CL/ChannelExtract.cpp | 2 +- tests/validation/CL/Col2Im.cpp | 3 +- .../validation/CL/ConvertFullyConnectedWeights.cpp | 2 +- tests/validation/CL/CropResize.cpp | 1 - tests/validation/CL/DeconvolutionLayer.cpp | 1 - tests/validation/CL/DepthToSpaceLayer.cpp | 2 +- .../CL/DepthwiseConvolutionLayerNative.cpp | 2 +- tests/validation/CL/Fill.cpp | 2 +- tests/validation/CL/FillBorder.cpp | 4 +- tests/validation/CL/Flatten.cpp | 2 +- tests/validation/CL/Floor.cpp | 2 +- tests/validation/CL/FuseBatchNormalization.cpp | 2 +- tests/validation/CL/GEMM.cpp | 2 +- .../validation/CL/GEMMLowpMatrixMultiplyNative.cpp | 2 +- .../CL/GEMMLowpMatrixMultiplyReshaped.cpp | 6 +- .../CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp | 4 +- tests/validation/CL/GEMMMatrixMultiply.cpp | 2 +- .../CL/GEMMMatrixMultiplyInterleavedTransposed.cpp | 6 +- tests/validation/CL/GEMMMatrixMultiplyNative.cpp | 2 +- tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp | 6 +- .../CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp | 4 +- tests/validation/CL/GEMMReshapeLHSMatrix.cpp | 2 +- tests/validation/CL/GEMMReshapeRHSMatrix.cpp | 2 +- tests/validation/CL/Gather.cpp | 1 - tests/validation/CL/GlobalPoolingLayer.cpp | 2 +- tests/validation/CL/HOGDescriptor.cpp | 2 +- tests/validation/CL/HOGDetector.cpp | 2 +- tests/validation/CL/HOGMultiDetection.cpp | 2 +- tests/validation/CL/Im2Col.cpp | 3 +- tests/validation/CL/InstanceNormalizationLayer.cpp | 2 +- tests/validation/CL/L2NormalizeLayer.cpp | 2 +- tests/validation/CL/LSTMLayerQuantized.cpp | 3 +- tests/validation/CL/LogSoftmaxLayer.cpp | 1 - .../validation/CL/MeanStdDevNormalizationLayer.cpp | 2 +- tests/validation/CL/NormalizationLayer.cpp | 2 +- tests/validation/CL/OpticalFlow.cpp | 2 +- tests/validation/CL/PriorBoxLayer.cpp | 2 +- tests/validation/CL/QLSTMLayerNormalization.cpp | 2 +- tests/validation/CL/RNNLayer.cpp | 2 +- tests/validation/CL/Range.cpp | 2 +- tests/validation/CL/ReduceMean.cpp | 1 - tests/validation/CL/Reverse.cpp | 2 +- tests/validation/CL/SoftmaxLayer.cpp | 1 - tests/validation/CL/SpaceToBatchLayer.cpp | 2 +- tests/validation/CL/SpaceToDepthLayer.cpp | 2 +- tests/validation/CL/TableLookup.cpp | 2 - tests/validation/CL/Tile.cpp | 2 +- tests/validation/CL/UNIT/DynamicTensor.cpp | 6 + tests/validation/CL/UNIT/Tuner.cpp | 2 +- tests/validation/CL/UNIT/WeightsRetention.cpp | 12 + tests/validation/CL/WeightsReshape.cpp | 2 +- tests/validation/CL/Winograd.cpp | 4 +- tests/validation/CL/YOLOLayer.cpp | 2 +- 745 files changed, 16819 insertions(+), 15787 deletions(-) delete mode 100644 arm_compute/core/CL/CLKernels.h delete mode 100644 arm_compute/core/CL/ICLKernel.h delete mode 100644 arm_compute/core/CL/ICLSimple2DKernel.h delete mode 100644 arm_compute/core/CL/ICLSimple3DKernel.h delete mode 100644 arm_compute/core/CL/ICLSimpleKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLAccumulateKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLActivationLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLBitwiseAndKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLBitwiseNotKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLBitwiseOrKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLBitwiseXorKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLBox3x3Kernel.h delete mode 100644 arm_compute/core/CL/kernels/CLCannyEdgeKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLChannelCombineKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLChannelExtractKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLCol2ImKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLColorConvertKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLComparisonKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLConvolutionKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLCopyKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLCropKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLDerivativeKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLDilateKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLErodeKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLFFTScaleKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLFastCornersKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLFillBorderKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLFlattenLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLFloorKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGatherKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLHOGDetectorKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLHarrisCornersKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLHistogramKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLIm2ColKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLIntegralImageKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLLKTrackerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLMeanStdDevKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLMedian3x3Kernel.h delete mode 100644 arm_compute/core/CL/kernels/CLMemsetKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h delete mode 100644 arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLPadLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLPermuteKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLPoolingLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLRangeKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLReductionOperationKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLRemapKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLReorgLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLReshapeLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLReverseKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLScaleKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLScharr3x3Kernel.h delete mode 100644 arm_compute/core/CL/kernels/CLSelectKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLSobel3x3Kernel.h delete mode 100644 arm_compute/core/CL/kernels/CLSobel5x5Kernel.h delete mode 100644 arm_compute/core/CL/kernels/CLSobel7x7Kernel.h delete mode 100644 arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLStackLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLStridedSliceKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLTableLookupKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLThresholdKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLTileKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLTransposeKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLWarpAffineKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLYOLOLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h create mode 100644 src/core/CL/CLKernels.h create mode 100644 src/core/CL/ICLKernel.h create mode 100644 src/core/CL/ICLSimple2DKernel.h create mode 100644 src/core/CL/ICLSimple3DKernel.h create mode 100644 src/core/CL/ICLSimpleKernel.h create mode 100644 src/core/CL/kernels/CLAbsoluteDifferenceKernel.h create mode 100644 src/core/CL/kernels/CLAccumulateKernel.h create mode 100644 src/core/CL/kernels/CLActivationLayerKernel.h create mode 100644 src/core/CL/kernels/CLArgMinMaxLayerKernel.h create mode 100644 src/core/CL/kernels/CLBatchConcatenateLayerKernel.h create mode 100644 src/core/CL/kernels/CLBatchNormalizationLayerKernel.h create mode 100644 src/core/CL/kernels/CLBatchToSpaceLayerKernel.h create mode 100644 src/core/CL/kernels/CLBitwiseAndKernel.h create mode 100644 src/core/CL/kernels/CLBitwiseNotKernel.h create mode 100644 src/core/CL/kernels/CLBitwiseOrKernel.h create mode 100644 src/core/CL/kernels/CLBitwiseXorKernel.h create mode 100644 src/core/CL/kernels/CLBoundingBoxTransformKernel.h create mode 100644 src/core/CL/kernels/CLBox3x3Kernel.h create mode 100644 src/core/CL/kernels/CLCannyEdgeKernel.h create mode 100644 src/core/CL/kernels/CLChannelCombineKernel.h create mode 100644 src/core/CL/kernels/CLChannelExtractKernel.h create mode 100644 src/core/CL/kernels/CLChannelShuffleLayerKernel.h create mode 100644 src/core/CL/kernels/CLCol2ImKernel.h create mode 100644 src/core/CL/kernels/CLColorConvertKernel.h create mode 100644 src/core/CL/kernels/CLComparisonKernel.h create mode 100644 src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h create mode 100644 src/core/CL/kernels/CLConvolutionKernel.h create mode 100644 src/core/CL/kernels/CLCopyKernel.h create mode 100644 src/core/CL/kernels/CLCropKernel.h create mode 100644 src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h create mode 100644 src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h create mode 100644 src/core/CL/kernels/CLDepthConcatenateLayerKernel.h create mode 100644 src/core/CL/kernels/CLDepthConvertLayerKernel.h create mode 100644 src/core/CL/kernels/CLDepthToSpaceLayerKernel.h create mode 100644 src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h create mode 100644 src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h create mode 100644 src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h create mode 100644 src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h create mode 100644 src/core/CL/kernels/CLDequantizationLayerKernel.h create mode 100644 src/core/CL/kernels/CLDerivativeKernel.h create mode 100644 src/core/CL/kernels/CLDilateKernel.h create mode 100644 src/core/CL/kernels/CLDirectConvolutionLayerKernel.h create mode 100644 src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h create mode 100644 src/core/CL/kernels/CLElementwiseOperationKernel.h create mode 100644 src/core/CL/kernels/CLErodeKernel.h create mode 100644 src/core/CL/kernels/CLFFTDigitReverseKernel.h create mode 100644 src/core/CL/kernels/CLFFTRadixStageKernel.h create mode 100644 src/core/CL/kernels/CLFFTScaleKernel.h create mode 100644 src/core/CL/kernels/CLFastCornersKernel.h create mode 100644 src/core/CL/kernels/CLFillBorderKernel.h create mode 100644 src/core/CL/kernels/CLFlattenLayerKernel.h create mode 100644 src/core/CL/kernels/CLFloorKernel.h create mode 100644 src/core/CL/kernels/CLFuseBatchNormalizationKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h create mode 100644 src/core/CL/kernels/CLGEMMLowpReductionKernel.h create mode 100644 src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h create mode 100644 src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h create mode 100644 src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h create mode 100644 src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h create mode 100644 src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h create mode 100644 src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h create mode 100644 src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h create mode 100644 src/core/CL/kernels/CLGatherKernel.h create mode 100644 src/core/CL/kernels/CLGaussian3x3Kernel.h create mode 100644 src/core/CL/kernels/CLGaussian5x5Kernel.h create mode 100644 src/core/CL/kernels/CLGaussianPyramidKernel.h create mode 100644 src/core/CL/kernels/CLGenerateProposalsLayerKernel.h create mode 100644 src/core/CL/kernels/CLHOGDescriptorKernel.h create mode 100644 src/core/CL/kernels/CLHOGDetectorKernel.h create mode 100644 src/core/CL/kernels/CLHarrisCornersKernel.h create mode 100644 src/core/CL/kernels/CLHeightConcatenateLayerKernel.h create mode 100644 src/core/CL/kernels/CLHistogramKernel.h create mode 100644 src/core/CL/kernels/CLIm2ColKernel.h create mode 100644 src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h create mode 100644 src/core/CL/kernels/CLIntegralImageKernel.h create mode 100644 src/core/CL/kernels/CLL2NormalizeLayerKernel.h create mode 100644 src/core/CL/kernels/CLLKTrackerKernel.h create mode 100644 src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h create mode 100644 src/core/CL/kernels/CLMagnitudePhaseKernel.h create mode 100644 src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h create mode 100644 src/core/CL/kernels/CLMeanStdDevKernel.h create mode 100644 src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h create mode 100644 src/core/CL/kernels/CLMedian3x3Kernel.h create mode 100644 src/core/CL/kernels/CLMemsetKernel.h create mode 100644 src/core/CL/kernels/CLMinMaxLayerKernel.h create mode 100644 src/core/CL/kernels/CLMinMaxLocationKernel.h create mode 100644 src/core/CL/kernels/CLNonLinearFilterKernel.h create mode 100644 src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h create mode 100644 src/core/CL/kernels/CLNormalizationLayerKernel.h create mode 100644 src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h create mode 100644 src/core/CL/kernels/CLPadLayerKernel.h create mode 100644 src/core/CL/kernels/CLPermuteKernel.h create mode 100644 src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h create mode 100644 src/core/CL/kernels/CLPoolingLayerKernel.h create mode 100644 src/core/CL/kernels/CLPriorBoxLayerKernel.h create mode 100644 src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h create mode 100644 src/core/CL/kernels/CLQuantizationLayerKernel.h create mode 100644 src/core/CL/kernels/CLROIAlignLayerKernel.h create mode 100644 src/core/CL/kernels/CLROIPoolingLayerKernel.h create mode 100644 src/core/CL/kernels/CLRangeKernel.h create mode 100644 src/core/CL/kernels/CLReductionOperationKernel.h create mode 100644 src/core/CL/kernels/CLRemapKernel.h create mode 100644 src/core/CL/kernels/CLReorgLayerKernel.h create mode 100644 src/core/CL/kernels/CLReshapeLayerKernel.h create mode 100644 src/core/CL/kernels/CLReverseKernel.h create mode 100644 src/core/CL/kernels/CLScaleKernel.h create mode 100644 src/core/CL/kernels/CLScharr3x3Kernel.h create mode 100644 src/core/CL/kernels/CLSelectKernel.h create mode 100644 src/core/CL/kernels/CLSobel3x3Kernel.h create mode 100644 src/core/CL/kernels/CLSobel5x5Kernel.h create mode 100644 src/core/CL/kernels/CLSobel7x7Kernel.h create mode 100644 src/core/CL/kernels/CLSoftmaxLayerKernel.h create mode 100644 src/core/CL/kernels/CLSpaceToBatchLayerKernel.h create mode 100644 src/core/CL/kernels/CLSpaceToDepthLayerKernel.h create mode 100644 src/core/CL/kernels/CLStackLayerKernel.h create mode 100644 src/core/CL/kernels/CLStridedSliceKernel.h create mode 100644 src/core/CL/kernels/CLTableLookupKernel.h create mode 100644 src/core/CL/kernels/CLThresholdKernel.h create mode 100644 src/core/CL/kernels/CLTileKernel.h create mode 100644 src/core/CL/kernels/CLTransposeKernel.h create mode 100644 src/core/CL/kernels/CLUpsampleLayerKernel.h create mode 100644 src/core/CL/kernels/CLWarpAffineKernel.h create mode 100644 src/core/CL/kernels/CLWarpPerspectiveKernel.h create mode 100644 src/core/CL/kernels/CLWeightsReshapeKernel.h create mode 100644 src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h create mode 100644 src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h create mode 100644 src/core/CL/kernels/CLWidthConcatenateLayerKernel.h create mode 100644 src/core/CL/kernels/CLWinogradFilterTransformKernel.h create mode 100644 src/core/CL/kernels/CLWinogradInputTransformKernel.h create mode 100644 src/core/CL/kernels/CLWinogradOutputTransformKernel.h create mode 100644 src/core/CL/kernels/CLYOLOLayerKernel.h create mode 100644 src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h diff --git a/3rdparty b/3rdparty index ff15a53098..ba65985c4a 160000 --- a/3rdparty +++ b/3rdparty @@ -1 +1 @@ -Subproject commit ff15a53098d2545808e2c840ad0d5bace6ae87a2 +Subproject commit ba65985c4a47effae4620b95b158ecae8764d2e2 diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h deleted file mode 100644 index 231534fe50..0000000000 --- a/arm_compute/core/CL/CLKernels.h +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLKERNELS_H -#define ARM_COMPUTE_CLKERNELS_H - -/* Header regrouping all the CL kernels */ -#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h" -#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h" -#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h" -#include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h" -#include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h" -#include "arm_compute/core/CL/kernels/CLBitwiseXorKernel.h" -#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h" -#include "arm_compute/core/CL/kernels/CLBox3x3Kernel.h" -#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h" -#include "arm_compute/core/CL/kernels/CLChannelCombineKernel.h" -#include "arm_compute/core/CL/kernels/CLChannelExtractKernel.h" -#include "arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" -#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h" -#include "arm_compute/core/CL/kernels/CLComparisonKernel.h" -#include "arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h" -#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" -#include "arm_compute/core/CL/kernels/CLCopyKernel.h" -#include "arm_compute/core/CL/kernels/CLCropKernel.h" -#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h" -#include "arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h" -#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h" -#include "arm_compute/core/CL/kernels/CLDilateKernel.h" -#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" -#include "arm_compute/core/CL/kernels/CLErodeKernel.h" -#include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h" -#include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h" -#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h" -#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h" -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" -#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLFloorKernel.h" -#include "arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" -#include "arm_compute/core/CL/kernels/CLGatherKernel.h" -#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h" -#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h" -#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h" -#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h" -#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h" -#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" -#include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLHistogramKernel.h" -#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" -#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h" -#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h" -#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" -#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" -#include "arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" -#include "arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h" -#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h" -#include "arm_compute/core/CL/kernels/CLMemsetKernel.h" -#include "arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h" -#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h" -#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h" -#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLPermuteKernel.h" -#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" -#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h" -#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLRangeKernel.h" -#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" -#include "arm_compute/core/CL/kernels/CLRemapKernel.h" -#include "arm_compute/core/CL/kernels/CLReorgLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLReverseKernel.h" -#include "arm_compute/core/CL/kernels/CLScaleKernel.h" -#include "arm_compute/core/CL/kernels/CLScharr3x3Kernel.h" -#include "arm_compute/core/CL/kernels/CLSelectKernel.h" -#include "arm_compute/core/CL/kernels/CLSobel3x3Kernel.h" -#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h" -#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h" -#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLStackLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h" -#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h" -#include "arm_compute/core/CL/kernels/CLThresholdKernel.h" -#include "arm_compute/core/CL/kernels/CLTileKernel.h" -#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" -#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h" -#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h" -#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" -#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" -#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" -#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h" -#include "arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h" -#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h" -#include "arm_compute/core/CL/kernels/CLYOLOLayerKernel.h" -#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" - -#endif /* ARM_COMPUTE_CLKERNELS_H */ diff --git a/arm_compute/core/CL/CLTypes.h b/arm_compute/core/CL/CLTypes.h index c44e2c4f3f..0f6eb0dfa4 100644 --- a/arm_compute/core/CL/CLTypes.h +++ b/arm_compute/core/CL/CLTypes.h @@ -75,5 +75,40 @@ struct CLQuantization const ICLFloatArray *scale; /**< Quantization scale array */ const ICLInt32Array *offset; /**< Quantization offset array */ }; + +/** Internal keypoint structure for Lucas-Kanade Optical Flow */ +struct CLLKInternalKeypoint +{ + float x{ 0.f }; /**< x coordinate of the keypoint */ + float y{ 0.f }; /**< y coordinate of the keypoint */ + float tracking_status{ 0.f }; /**< the tracking status of the keypoint */ + float dummy{ 0.f }; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */ +}; + +/** Structure for storing Spatial Gradient Matrix and the minimum eigenvalue for each keypoint */ +struct CLCoefficientTable +{ + float A11; /**< iA11 * FLT_SCALE */ + float A12; /**< iA11 * FLT_SCALE */ + float A22; /**< iA11 * FLT_SCALE */ + float min_eig; /**< Minimum eigenvalue */ +}; + +/** Structure for storing ival, ixval and iyval for each point inside the window */ +struct CLOldValue +{ + int16_t ival; /**< ival extracts from old image */ + int16_t ixval; /**< ixval extracts from scharr Gx image */ + int16_t iyval; /**< iyval extracts from scharr Gy image */ + int16_t dummy; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */ +}; + +/** Interface for OpenCL Array of Internal Key Points. */ +using ICLLKInternalKeypointArray = ICLArray; +/** Interface for OpenCL Array of Coefficient Tables. */ +using ICLCoefficientTableArray = ICLArray; +/** Interface for OpenCL Array of Old Values. */ +using ICLOldValArray = ICLArray; + } // namespace arm_compute #endif /* ARM_COMPUTE_CL_TYPES_H */ diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h deleted file mode 100644 index a24cd8c798..0000000000 --- a/arm_compute/core/CL/ICLKernel.h +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICLKERNEL_H -#define ARM_COMPUTE_ICLKERNEL_H - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLTypes.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/GPUTarget.h" -#include "arm_compute/core/IKernel.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/experimental/Types.h" - -#include - -namespace arm_compute -{ -template -class ICLArray; -class ICLTensor; -class Window; - -/** Common interface for all the OpenCL kernels */ -class ICLKernel : public IKernel -{ -private: - /** Returns the number of arguments enqueued per array object. - * - * @return The number of arguments enqueued per array object. - */ - template - constexpr static unsigned int num_arguments_per_array() - { - return num_arguments_per_tensor(); - } - /** Returns the number of arguments enqueued per tensor object. - * - * @return The number of arguments enqueued per tensor object. - */ - template - constexpr static unsigned int num_arguments_per_tensor() - { - return 2 + 2 * dimension_size; - } - using IKernel::configure; //Prevent children from calling IKernel::configure() directly -protected: - /** Configure the kernel's window and local workgroup size hint. - * - * @param[in] window The maximum window which will be returned by window() - * @param[in] lws_hint (Optional) Local-Workgroup-Size to use. - */ - void configure_internal(const Window &window, cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange()) - { - _lws_hint = lws_hint; - IKernel::configure(window); - } - -public: - /** Constructor */ - ICLKernel() - : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _lws_hint() - { - } - /** Returns a reference to the OpenCL kernel of this object. - * - * @return A reference to the OpenCL kernel of this object. - */ - cl::Kernel &kernel() - { - return _kernel; - } - /** Add the passed 1D array's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] array Array to set as an argument of the object's kernel. - * @param[in] strides @ref Strides object containing stride of each dimension in bytes. - * @param[in] num_dimensions Number of dimensions of the @p array. - * @param[in] window Window the kernel will be executed on. - */ - template - void add_1D_array_argument(unsigned int &idx, const ICLArray *array, const Strides &strides, unsigned int num_dimensions, const Window &window) - { - add_array_argument(idx, array, strides, num_dimensions, window); - } - /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] window Window the kernel will be executed on. - */ - void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window) - { - add_tensor_argument<1>(idx, tensor, window); - } - /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx if the condition is true. - * - * @param[in] cond Condition to check - * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] window Window the kernel will be executed on. - */ - void add_1D_tensor_argument_if(bool cond, unsigned int &idx, const ICLTensor *tensor, const Window &window) - { - if(cond) - { - add_1D_tensor_argument(idx, tensor, window); - } - } - /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] window Window the kernel will be executed on. - */ - void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window) - { - add_tensor_argument<2>(idx, tensor, window); - } - /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx if the condition is true. - * - * @param[in] cond Condition to check - * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] window Window the kernel will be executed on. - */ - void add_2D_tensor_argument_if(bool cond, unsigned int &idx, const ICLTensor *tensor, const Window &window) - { - if(cond) - { - add_2D_tensor_argument(idx, tensor, window); - } - } - /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] window Window the kernel will be executed on. - */ - void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window) - { - add_tensor_argument<3>(idx, tensor, window); - } - /** Add the passed 4D tensor's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] window Window the kernel will be executed on. - */ - void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window) - { - add_tensor_argument<4>(idx, tensor, window); - } - /** Returns the number of arguments enqueued per 1D array object. - * - * @return The number of arguments enqueues per 1D array object. - */ - constexpr static unsigned int num_arguments_per_1D_array() - { - return num_arguments_per_array<1>(); - } - /** Returns the number of arguments enqueued per 1D tensor object. - * - * @return The number of arguments enqueues per 1D tensor object. - */ - constexpr static unsigned int num_arguments_per_1D_tensor() - { - return num_arguments_per_tensor<1>(); - } - /** Returns the number of arguments enqueued per 2D tensor object. - * - * @return The number of arguments enqueues per 2D tensor object. - */ - constexpr static unsigned int num_arguments_per_2D_tensor() - { - return num_arguments_per_tensor<2>(); - } - /** Returns the number of arguments enqueued per 3D tensor object. - * - * @return The number of arguments enqueues per 3D tensor object. - */ - constexpr static unsigned int num_arguments_per_3D_tensor() - { - return num_arguments_per_tensor<3>(); - } - /** Returns the number of arguments enqueued per 4D tensor object. - * - * @return The number of arguments enqueues per 4D tensor object. - */ - constexpr static unsigned int num_arguments_per_4D_tensor() - { - return num_arguments_per_tensor<4>(); - } - /** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue. - * - * @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns. - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel. - */ - virtual void run(const Window &window, cl::CommandQueue &queue) - { - ARM_COMPUTE_UNUSED(window, queue); - } - /** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue. - * - * @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns. - * - * @param[in] tensors A vector containing the tensors to operato on. - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel. - */ - virtual void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) - { - ARM_COMPUTE_UNUSED(tensors, window, queue); - } - /** Add the passed parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the arguments. Will be incremented by the number of kernel arguments set. - * @param[in] value Value to set as an argument of the object's kernel. - */ - template - void add_argument(unsigned int &idx, T value) - { - _kernel.setArg(idx++, value); - } - - /** Set the Local-Workgroup-Size hint - * - * @note This method should be called after the configuration of the kernel - * - * @param[in] lws_hint Local-Workgroup-Size to use - */ - void set_lws_hint(const cl::NDRange &lws_hint) - { - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // lws_hint will be overwritten by configure() - _lws_hint = lws_hint; - } - - /** Return the Local-Workgroup-Size hint - * - * @return Current lws hint - */ - cl::NDRange lws_hint() const - { - return _lws_hint; - } - - /** Get the configuration ID - * - * @note The configuration ID can be used by the caller to distinguish different calls of the same OpenCL kernel - * In particular, this method can be used by CLScheduler to keep track of the best LWS for each configuration of the same kernel. - * The configuration ID should be provided only for the kernels potentially affected by the LWS geometry - * - * @note This method should be called after the configuration of the kernel - * - * @return configuration id string - */ - const std::string &config_id() const - { - return _config_id; - } - - /** Set the targeted GPU architecture - * - * @param[in] target The targeted GPU architecture - */ - void set_target(GPUTarget target) - { - _target = target; - } - - /** Set the targeted GPU architecture according to the CL device - * - * @param[in] device A CL device - */ - void set_target(cl::Device &device); - - /** Get the targeted GPU architecture - * - * @return The targeted GPU architecture. - */ - GPUTarget get_target() const - { - return _target; - } - - /** Get the maximum workgroup size for the device the CLKernelLibrary uses. - * - * @return The maximum workgroup size value. - */ - size_t get_max_workgroup_size(); - /** Get the global work size given an execution window - * - * @param[in] window Execution window - * - * @return Global work size of the given execution window - */ - static cl::NDRange gws_from_window(const Window &window); - -private: - /** Add the passed array's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] array Array to set as an argument of the object's kernel. - * @param[in] strides @ref Strides object containing stride of each dimension in bytes. - * @param[in] num_dimensions Number of dimensions of the @p array. - * @param[in] window Window the kernel will be executed on. - */ - template - void add_array_argument(unsigned int &idx, const ICLArray *array, const Strides &strides, unsigned int num_dimensions, const Window &window); - /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] window Window the kernel will be executed on. - */ - template - void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); - -protected: - cl::Kernel _kernel; /**< OpenCL kernel to run */ - GPUTarget _target; /**< The targeted GPU */ - std::string _config_id; /**< Configuration ID */ - size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */ -private: - cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */ -}; - -/** Add the kernel to the command queue with the given window. - * - * @note Depending on the size of the window, this might translate into several jobs being enqueued. - * - * @note If kernel->kernel() is empty then the function will return without adding anything to the queue. - * - * @param[in,out] queue OpenCL command queue. - * @param[in] kernel Kernel to enqueue - * @param[in] window Window the kernel has to process. - * @param[in] lws_hint (Optional) Local workgroup size requested. Default is based on the device target. - * @param[in] use_dummy_work_items (Optional) Use dummy work items in order to have two dimensional power of two NDRange. Default is false - * Note: it is kernel responsibility to check if the work-item is out-of-range - * - * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed. - */ -void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items = false); - -/** Add the passed array's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] array Array to set as an argument of the object's kernel. - * @param[in] strides @ref Strides object containing stride of each dimension in bytes. - * @param[in] num_dimensions Number of dimensions of the @p array. - * @param[in] window Window the kernel will be executed on. - */ -template -void ICLKernel::add_array_argument(unsigned &idx, const ICLArray *array, const Strides &strides, unsigned int num_dimensions, const Window &window) -{ - ARM_COMPUTE_ERROR_ON(array == nullptr); - - // Calculate offset to the start of the window - unsigned int offset_first_element = 0; - - for(unsigned int n = 0; n < num_dimensions; ++n) - { - offset_first_element += window[n].start() * strides[n]; - } - - unsigned int idx_start = idx; - _kernel.setArg(idx++, array->cl_buffer()); - - for(unsigned int dimension = 0; dimension < dimension_size; dimension++) - { - _kernel.setArg(idx++, strides[dimension]); - _kernel.setArg(idx++, strides[dimension] * window[dimension].step()); - } - - _kernel.setArg(idx++, offset_first_element); - - ARM_COMPUTE_ERROR_ON_MSG_VAR(idx_start + num_arguments_per_array() != idx, - "add_%dD_array_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_array()); - ARM_COMPUTE_UNUSED(idx_start); -} -} -#endif /*ARM_COMPUTE_ICLKERNEL_H */ diff --git a/arm_compute/core/CL/ICLSimple2DKernel.h b/arm_compute/core/CL/ICLSimple2DKernel.h deleted file mode 100644 index 86561cd562..0000000000 --- a/arm_compute/core/CL/ICLSimple2DKernel.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICLSIMPLE2DKERNEL_H -#define ARM_COMPUTE_ICLSIMPLE2DKERNEL_H - -#include "arm_compute/core/CL/ICLSimpleKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output. This interface can be used when the work-item processes a 2D tile */ -class ICLSimple2DKernel : public ICLSimpleKernel -{ -public: - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; -}; -} -#endif /*ARM_COMPUTE_ICLSIMPLE2DKERNEL_H */ diff --git a/arm_compute/core/CL/ICLSimple3DKernel.h b/arm_compute/core/CL/ICLSimple3DKernel.h deleted file mode 100644 index 3b4eaf7350..0000000000 --- a/arm_compute/core/CL/ICLSimple3DKernel.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICLSIMPLE3DKERNEL_H -#define ARM_COMPUTE_ICLSIMPLE3DKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output. - * Both input tensor and output tensor must have at least 3 dimensions. - */ -class ICLSimple3DKernel : public ICLSimple2DKernel -{ -public: - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; -}; -} -#endif /*ARM_COMPUTE_ICLSIMPLE3DKERNEL_H */ diff --git a/arm_compute/core/CL/ICLSimpleKernel.h b/arm_compute/core/CL/ICLSimpleKernel.h deleted file mode 100644 index 805342f830..0000000000 --- a/arm_compute/core/CL/ICLSimpleKernel.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICLSIMPLEKERNEL_H -#define ARM_COMPUTE_ICLSIMPLEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" - -namespace arm_compute -{ -/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output */ -class ICLSimpleKernel : public ICLKernel -{ -public: - /** Constructor. */ - ICLSimpleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICLSimpleKernel(const ICLSimpleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICLSimpleKernel &operator=(const ICLSimpleKernel &) = delete; - /** Allow instances of this class to be moved */ - ICLSimpleKernel(ICLSimpleKernel &&) = default; - /** Allow instances of this class to be moved */ - ICLSimpleKernel &operator=(ICLSimpleKernel &&) = default; - /** Default destructor */ - ~ICLSimpleKernel() = default; - - /** Configure the kernel - * - * @param[in] input Source tensor. - * @param[out] output Destination tensor. - * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. - * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. - * @param[in] border_size (Optional) Size of the border. - */ - void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize()); - -protected: - const ICLTensor *_input; - ICLTensor *_output; -}; -} - -#endif /*ARM_COMPUTE_ICLSIMPLEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h b/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h deleted file mode 100644 index f62855cbb9..0000000000 --- a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H -#define ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the absolute difference kernel. - * - * Absolute difference is computed by: - * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] - */ -class CLAbsoluteDifferenceKernel : public ICLKernel -{ -public: - /** Default constructor. */ - CLAbsoluteDifferenceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLAbsoluteDifferenceKernel(const CLAbsoluteDifferenceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLAbsoluteDifferenceKernel &operator=(const CLAbsoluteDifferenceKernel &) = delete; - /** Allow instances of this class to be moved */ - CLAbsoluteDifferenceKernel(CLAbsoluteDifferenceKernel &&) = default; - /** Allow instances of this class to be moved */ - CLAbsoluteDifferenceKernel &operator=(CLAbsoluteDifferenceKernel &&) = default; - /** Default destructor */ - ~CLAbsoluteDifferenceKernel() = default; - - /** Set the inputs and output images. - * - * @param[in] input1 Source tensor. Data types supported: U8/S16. - * @param[in] input2 Source tensor. Data types supported: U8/S16. - * @param[out] output Destination tensor. Data types supported: U8/S16. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - /** Set the inputs and output images. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 Source tensor. Data types supported: U8/S16. - * @param[in] input2 Source tensor. Data types supported: U8/S16. - * @param[out] output Destination tensor. Data types supported: U8/S16. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input1; /**< Source tensor 1. */ - const ICLTensor *_input2; /**< Source tensor 2. */ - ICLTensor *_output; /**< Destination tensor. */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLAccumulateKernel.h b/arm_compute/core/CL/kernels/CLAccumulateKernel.h deleted file mode 100644 index e067da084f..0000000000 --- a/arm_compute/core/CL/kernels/CLAccumulateKernel.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLACCUMULATEKERNEL_H -#define ARM_COMPUTE_CLACCUMULATEKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -#include - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the accumulate kernel. - * - * Accumulation is computed by: - * @f[ accum(x,y) = accum(x,y) + input(x,y) @f] - */ -class CLAccumulateKernel : public ICLSimple2DKernel -{ -public: - /** Set the input and accumulation tensors. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] accum Destination tensor. Data types supported: S16. - */ - void configure(const ICLTensor *input, ICLTensor *accum); - /** Set the input and accumulation tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] accum Destination tensor. Data types supported: S16. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum); -}; - -/** Interface for the accumulate weighted kernel. - * - * Weighted accumulation is computed: - * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f] - * - * Where @f$ 0 \le \alpha \le 1 @f$ - * Conceptually, the rounding for this is defined as: - * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f] -*/ -class CLAccumulateWeightedKernel : public ICLSimple2DKernel -{ -public: - /** Set the input and accumulation images, and the scale value. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32. - * @param[in,out] accum Accumulated tensor. Data types supported: U8. - */ - void configure(const ICLTensor *input, float alpha, ICLTensor *accum); - /** Set the input and accumulation images, and the scale value. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32. - * @param[in,out] accum Accumulated tensor. Data types supported: U8. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum); -}; - -/** Interface for the accumulate squared kernel. - * - * The accumulation of squares is computed: - * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f] - * - * Where @f$ 0 \le shift \le 15 @f$ -*/ -class CLAccumulateSquaredKernel : public ICLSimple2DKernel -{ -public: - /** Set the input and accumulation tensors and the shift value. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32. - * @param[in,out] accum Accumulated tensor. Data types supported: S16. - */ - void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum); - /** Set the input and accumulation tensors and the shift value. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32. - * @param[in,out] accum Accumulated tensor. Data types supported: S16. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLACCUMULATEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h deleted file mode 100644 index 81d4ccb065..0000000000 --- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H -#define ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; -/** Interface for the activation layer kernel. */ -class CLActivationLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLActivationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLActivationLayerKernel(const CLActivationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLActivationLayerKernel &operator=(const CLActivationLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLActivationLayerKernel(CLActivationLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLActivationLayerKernel &operator=(CLActivationLayerKernel &&) = default; - /** Default destructor */ - ~CLActivationLayerKernel() = default; - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr, the activation function will be performed in-place - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo act_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayerKernel - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - -private: - bool _run_in_place; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h b/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h deleted file mode 100644 index 48876c0b56..0000000000 --- a/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H -#define ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the reduction operation kernel - * - * @note The default data type for an uninitialized output tensor is - * signed 32-bit integer (S32). It is the user's responsibility to check - * that the results do not overflow because the indices are computed - * in unsigned 32-bit (U32). - */ -class CLArgMinMaxLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLArgMinMaxLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLArgMinMaxLayerKernel(const CLArgMinMaxLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLArgMinMaxLayerKernel &operator=(const CLArgMinMaxLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLArgMinMaxLayerKernel(CLArgMinMaxLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLArgMinMaxLayerKernel &operator=(CLArgMinMaxLayerKernel &&) = default; - /** Default destructor */ - ~CLArgMinMaxLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. - * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32 - * Has to be nullptr for the first iteration - * @param[out] output Destination tensor. Data types supported: U32/S32 - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 - * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported. - */ - void configure(const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. - * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32 - * Has to be nullptr for the first iteration - * @param[out] output Destination tensor. Data types supported: U32/S32 - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 - * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op); - - /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayerKernel. - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. - * @param[in] prev_output Destination tensor info of the previous iterations. Data types supported: U32/S32 - * Has to be nullptr for the first iteration - * @param[in] output Destination tensor info. Data types supported: U32/S32 - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 - * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *prev_output, const ITensorInfo *output, unsigned int axis, ReductionOperation op); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_prev_output; - ICLTensor *_output; - unsigned int _reduction_axis; - ReductionOperation _op; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h deleted file mode 100644 index bb8968ca83..0000000000 --- a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H -#define ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the batch concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLBatchConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLBatchConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchConcatenateLayerKernel(const CLBatchConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchConcatenateLayerKernel &operator=(const CLBatchConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBatchConcatenateLayerKernel(CLBatchConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBatchConcatenateLayerKernel &operator=(CLBatchConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLBatchConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[in] batch_offset The offset on axis # 3. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] batch_offset The offset on axis # 3. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - -private: - unsigned int _batch_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h deleted file mode 100644 index c556a0c6f4..0000000000 --- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the BatchNormalization layer kernel. - */ -class CLBatchNormalizationLayerKernel : public ICLKernel -{ -public: - /** Constructor */ - CLBatchNormalizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchNormalizationLayerKernel(const CLBatchNormalizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchNormalizationLayerKernel &operator=(const CLBatchNormalizationLayerKernel &) = delete; - /** Default Move Constructor. */ - CLBatchNormalizationLayerKernel(CLBatchNormalizationLayerKernel &&) = default; - /** Default move assignment operator */ - CLBatchNormalizationLayerKernel &operator=(CLBatchNormalizationLayerKernel &&) = default; - /** Default destructor */ - ~CLBatchNormalizationLayerKernel() = default; - - /** Set the input and output tensors. - * - * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. - * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input - * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input - * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - */ - void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr, float epsilon = 0.001f, - ActivationLayerInfo act_info = ActivationLayerInfo()); - /** Set the input and output tensors. - * - * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. - * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input - * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input - * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, - const ICLTensor *gamma = nullptr, float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayerKernel - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result. - * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input - * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input - * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *mean, const ITensorInfo *var, - const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr, - float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; - ICLTensor *_output; - const ICLTensor *_mean; - const ICLTensor *_var; - const ICLTensor *_beta; - const ICLTensor *_gamma; - float _epsilon; - bool _run_in_place; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h deleted file mode 100644 index 7af88d8986..0000000000 --- a/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H -#define ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the batch to space kernel */ -class CLBatchToSpaceLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLBatchToSpaceLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchToSpaceLayerKernel(const CLBatchToSpaceLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchToSpaceLayerKernel &operator=(const CLBatchToSpaceLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBatchToSpaceLayerKernel(CLBatchToSpaceLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBatchToSpaceLayerKernel &operator=(CLBatchToSpaceLayerKernel &&) = default; - /** Default destructor */ - ~CLBatchToSpaceLayerKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output); - /** Initialise the kernel's inputs and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output); - /** Initialise the kernel's inputs and output (Static block shape). - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output); - /** Initialise the kernel's inputs and output (Static block shape). - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[in] output Tensor output. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel (Static block shape). - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] output Tensor output. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - const ICLTensor *_block_shape; /**< Block shape tensor */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h b/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h deleted file mode 100644 index e291f08b9a..0000000000 --- a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLBITWISEANDKERNEL_H -#define ARM_COMPUTE_CLBITWISEANDKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the bitwise AND operation kernel. - * - * Result is computed by: - * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f] - */ -class CLBitwiseAndKernel : public ICLKernel -{ -public: - /** Default constructor. */ - CLBitwiseAndKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBitwiseAndKernel(const CLBitwiseAndKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBitwiseAndKernel &operator=(const CLBitwiseAndKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBitwiseAndKernel(CLBitwiseAndKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBitwiseAndKernel &operator=(CLBitwiseAndKernel &&) = default; - /** Set the inputs and output images - * - * @param[in] input1 Source tensor. Data types supported: U8. - * @param[in] input2 Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - /** Set the inputs and output images - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 Source tensor. Data types supported: U8. - * @param[in] input2 Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input1; /**< Source tensor 1 */ - const ICLTensor *_input2; /**< Source tensor 2 */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLBITWISEANDKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h b/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h deleted file mode 100644 index f57bbf4778..0000000000 --- a/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLBITWISENOTKERNEL_H -#define ARM_COMPUTE_CLBITWISENOTKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the bitwise NOT operation kernel. - * - * Result is computed by: - * @f[ output(x,y) = \lnot input(x,y) @f] - */ -class CLBitwiseNotKernel : public ICLSimple2DKernel -{ -public: - /** Set the inputs and output images. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Set the inputs and output images. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLBITWISENOTKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h b/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h deleted file mode 100644 index 944224ecb9..0000000000 --- a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLBITWISEORKERNEL_H -#define ARM_COMPUTE_CLBITWISEORKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the bitwise OR operation kernel. - * - * Result is computed by: - * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f] - */ -class CLBitwiseOrKernel : public ICLKernel -{ -public: - /** Default constructor. */ - CLBitwiseOrKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBitwiseOrKernel(const CLBitwiseOrKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBitwiseOrKernel &operator=(const CLBitwiseOrKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBitwiseOrKernel(CLBitwiseOrKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBitwiseOrKernel &operator=(CLBitwiseOrKernel &&) = default; - /** Set the inputs and output images - * - * @param[in] input1 Source tensor. Data types supported: U8. - * @param[in] input2 Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - /** Set the inputs and output images - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 Source tensor. Data types supported: U8. - * @param[in] input2 Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input1; /**< Source tensor 1 */ - const ICLTensor *_input2; /**< Source tensor 2 */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLBITWISEORKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h b/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h deleted file mode 100644 index 732ae8659e..0000000000 --- a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLBITWISEXORKERNEL_H -#define ARM_COMPUTE_CLBITWISEXORKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the bitwise XOR operation kernel. - * - * Result is computed by: - * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f] - */ -class CLBitwiseXorKernel : public ICLKernel -{ -public: - /** Default constructor. */ - CLBitwiseXorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBitwiseXorKernel(const CLBitwiseXorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBitwiseXorKernel &operator=(const CLBitwiseXorKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBitwiseXorKernel(CLBitwiseXorKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBitwiseXorKernel &operator=(CLBitwiseXorKernel &&) = default; - /** Set the inputs and output images - * - * @param[in] input1 Source tensor. Data types supported: U8. - * @param[in] input2 Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - /** Set the inputs and output images - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 Source tensor. Data types supported: U8. - * @param[in] input2 Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input1; /**< Source tensor 1 */ - const ICLTensor *_input2; /**< Source tensor 2 */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLBITWISEXORKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h b/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h deleted file mode 100644 index 4e8c5a6f18..0000000000 --- a/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H -#define ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the bounding box kernel */ -class CLBoundingBoxTransformKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLBoundingBoxTransformKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBoundingBoxTransformKernel(const CLBoundingBoxTransformKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBoundingBoxTransformKernel &operator=(const CLBoundingBoxTransformKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBoundingBoxTransformKernel(CLBoundingBoxTransformKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBoundingBoxTransformKernel &operator=(CLBoundingBoxTransformKernel &&) = default; - /** Default destructor */ - ~CLBoundingBoxTransformKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. - * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. - * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input - * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. - * - * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. - * - */ - void configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. - * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. - * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input - * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. - * - * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. - * - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform - * - * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. - * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. - * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input - * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. - * - * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. - * - * @return a Status - */ - static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_boxes; - ICLTensor *_pred_boxes; - const ICLTensor *_deltas; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBox3x3Kernel.h b/arm_compute/core/CL/kernels/CLBox3x3Kernel.h deleted file mode 100644 index 1a8572dd68..0000000000 --- a/arm_compute/core/CL/kernels/CLBox3x3Kernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLBOX3X3KERNEL_H -#define ARM_COMPUTE_CLBOX3X3KERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the box 3x3 filter kernel. - * - */ -class CLBox3x3Kernel : public ICLSimple2DKernel -{ -public: - /**Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /**Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - //Inherited methods overriden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLBOX3X3KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h deleted file mode 100644 index c4d0297aef..0000000000 --- a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCANNYEDGEKERNEL_H -#define ARM_COMPUTE_CLCANNYEDGEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform Gradient computation. - */ -class CLGradientKernel : public ICLKernel -{ -public: - /** Constructor */ - CLGradientKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGradientKernel(const CLGradientKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGradientKernel &operator=(const CLGradientKernel &) = delete; - /** Initialise the kernel's sources, destinations and border mode. - * - * @note gx, gy and mag must all be the same size (either 16 or 32). - * - * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32. - * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx. - * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy. - * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8. - * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. - */ - void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type); - /** Initialise the kernel's sources, destinations and border mode. - * - * @note gx, gy and mag must all be the same size (either 16 or 32). - * - * @param[in] compile_context The compile context to be used. - * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32. - * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx. - * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy. - * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8. - * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_gx; /**< Source tensor - Gx component */ - const ICLTensor *_gy; /**< Source tensor - Gy component */ - ICLTensor *_magnitude; /**< Destination tensor - Magnitude */ - ICLTensor *_phase; /**< Destination tensor - Quantized phase */ -}; - -/** OpenCL kernel to perform Non-Maxima suppression for Canny Edge. - * - * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input - * to characterize points as possible edges. The output buffer needs to be cleared before this kernel is executed. - * - * @note Hysteresis is computed in @ref CLEdgeTraceKernel - */ -class CLEdgeNonMaxSuppressionKernel : public ICLKernel -{ -public: - /** Constructor */ - CLEdgeNonMaxSuppressionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLEdgeNonMaxSuppressionKernel(const CLEdgeNonMaxSuppressionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLEdgeNonMaxSuppressionKernel &operator=(const CLEdgeNonMaxSuppressionKernel &) = delete; - /** Initialise the kernel's sources, destination and border mode. - * - * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32. - * @param[in] phase Source tensor - Quantized phase. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U16/U32. - * @param[in] lower_thr Lower threshold. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined); - /** Initialise the kernel's sources, destination and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32. - * @param[in] phase Source tensor - Quantized phase. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U16/U32. - * @param[in] lower_thr Lower threshold. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_magnitude; /**< Source tensor - Magnitude. */ - const ICLTensor *_phase; /**< Source tensor - Quantized phase. */ - ICLTensor *_output; /**< Destination tensor. */ -}; - -/** OpenCL kernel to perform Edge tracing. - */ -class CLEdgeTraceKernel : public ICLKernel -{ -public: - /** Constructor */ - CLEdgeTraceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLEdgeTraceKernel(const CLEdgeTraceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLEdgeTraceKernel &operator=(const CLEdgeTraceKernel &) = delete; - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data types supported: U16/U32. - * @param[out] output Destination tensor. Data types supported: U8. - * @param[in] upper_thr Upper threshold used for the hysteresis - * @param[in] lower_thr Lower threshold used for the hysteresis - * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32. - * Expected to be initialized to 0 before each run. - * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32 - * Expected to be initialized to 0 before each run. - * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32. - * Expected to be initialized to 0 before each run. - * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8. - * Expected to be initialized to 0 before each run. - */ - void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, - ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter); - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U16/U32. - * @param[out] output Destination tensor. Data types supported: U8. - * @param[in] upper_thr Upper threshold used for the hysteresis - * @param[in] lower_thr Lower threshold used for the hysteresis - * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32. - * Expected to be initialized to 0 before each run. - * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32 - * Expected to be initialized to 0 before each run. - * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32. - * Expected to be initialized to 0 before each run. - * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8. - * Expected to be initialized to 0 before each run. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, - ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor. */ - ICLTensor *_output; /**< Destination tensor. */ - int32_t _lower_thr; /**< Lower threshold used for the hysteresis. */ - int32_t _upper_thr; /**< Upper threshold used for the hysteresis. */ - ICLTensor *_visited; /**< Marks visited elements */ - ICLTensor *_recorded; /**< Marks recorded elements */ - ICLTensor *_l1_stack; /**< L1 hysteris stack */ - ICLTensor *_l1_stack_counter; /**< L1 hysteris stack counter */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLCANNYEDGEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h deleted file mode 100644 index f9c33df7c1..0000000000 --- a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H -#define ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include -#include - -namespace arm_compute -{ -class ICLMultiImage; -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the channel combine kernel */ -class CLChannelCombineKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLChannelCombineKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLChannelCombineKernel(const CLChannelCombineKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLChannelCombineKernel &operator=(const CLChannelCombineKernel &) = delete; - /** Allow instances of this class to be moved */ - CLChannelCombineKernel(CLChannelCombineKernel &&) = default; - /** Allow instances of this class to be moved */ - CLChannelCombineKernel &operator=(CLChannelCombineKernel &&) = default; - /** Default destructor */ - ~CLChannelCombineKernel() = default; - /** Configure function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. - * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. - * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. - * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. - * @param[out] output The single planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422. - */ - void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); - /** Configure function's inputs and outputs. - * - * @param[in] compile_context The compile context to be used. - * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. - * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. - * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. - * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. - * @param[out] output The single planar output tensor. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); - /** Configure function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. - * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. - * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. - * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422. - */ - void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); - /** Configure function's inputs and outputs. - * - * @param[in] compile_context The compile context to be used. - * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. - * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. - * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. - * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - std::array _planes; - ICLTensor *_output; - ICLMultiImage *_output_multi; - std::array _x_subsampling; - std::array _y_subsampling; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLChannelExtractKernel.h b/arm_compute/core/CL/kernels/CLChannelExtractKernel.h deleted file mode 100644 index 1ccf38bb8c..0000000000 --- a/arm_compute/core/CL/kernels/CLChannelExtractKernel.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H -#define ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -#include - -namespace arm_compute -{ -class ICLMultiImage; -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the channel extract kernel */ -class CLChannelExtractKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLChannelExtractKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLChannelExtractKernel(const CLChannelExtractKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLChannelExtractKernel &operator=(const CLChannelExtractKernel &) = delete; - /** Allow instances of this class to be moved */ - CLChannelExtractKernel(CLChannelExtractKernel &&) = default; - /** Allow instances of this class to be moved */ - CLChannelExtractKernel &operator=(CLChannelExtractKernel &&) = default; - /** Default destructor */ - ~CLChannelExtractKernel() = default; - /** Set the input and output of the kernel - * - * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 - * @param[in] channel Channel to extract. - * @param[out] output Destination tensor. Must be of U8 format. - */ - void configure(const ICLTensor *input, Channel channel, ICLTensor *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 - * @param[in] channel Channel to extract. - * @param[out] output Destination tensor. Must be of U8 format. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 - * @param[in] channel Channel to extract. - * @param[out] output Single-planar 2D destination image. Must be of U8 format. - */ - void configure(const ICLMultiImage *input, Channel channel, ICLImage *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 - * @param[in] channel Channel to extract. - * @param[out] output Single-planar 2D destination image. Must be of U8 format. - */ - void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - uint32_t _num_elems_processed_per_iteration; - uint32_t _subsampling; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h b/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h deleted file mode 100644 index bf58525248..0000000000 --- a/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H -#define ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the channel shuffle kernel */ -class CLChannelShuffleLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLChannelShuffleLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLChannelShuffleLayerKernel(const CLChannelShuffleLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLChannelShuffleLayerKernel &operator=(const CLChannelShuffleLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLChannelShuffleLayerKernel(CLChannelShuffleLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLChannelShuffleLayerKernel &operator=(CLChannelShuffleLayerKernel &&) = default; - /** Default destructor */ - ~CLChannelShuffleLayerKernel() = default; - /** Configure function's inputs and outputs. - * - * @param[in] input Input tensor. Data types supported: All. - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. - */ - void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups); - /** Configure function's inputs and outputs. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups); - /** Static function to check if given info will lead to a valid configuration of @ref CLChannelShuffleLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] output Output tensor info. Data type supported: Same as @p input - * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLCol2ImKernel.h b/arm_compute/core/CL/kernels/CLCol2ImKernel.h deleted file mode 100644 index c3a1ff3a50..0000000000 --- a/arm_compute/core/CL/kernels/CLCol2ImKernel.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCOL2IMKERNEL_H -#define ARM_COMPUTE_CLCOL2IMKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the col2im reshaping kernel. - * - * Rearranges each matrix column into image blocks. It's the inverse operation of @ref CLIm2ColKernel. - * - * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: - * - * @f[ - * \left( \begin{array}{ccccccccc} - * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccc} - * a0 & a1 & a2 \\ - * a3 & a4 & a5 \\ - * a6 & a7 & a8 \\ - * \end{array} \right) - * @f] - */ -class CLCol2ImKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLCol2ImKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLCol2ImKernel(const CLCol2ImKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLCol2ImKernel &operator=(const CLCol2ImKernel &) = delete; - /** Allow instances of this class to be moved */ - CLCol2ImKernel(CLCol2ImKernel &&) = default; - /** Allow instances of this class to be moved */ - CLCol2ImKernel &operator=(CLCol2ImKernel &&) = default; - /** Default destructor */ - ~CLCol2ImKernel() = default; - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], - * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW - * @param[in] convolved_dims Output convolved dimensions. - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution - */ - void configure(const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1); - /** Set the input and output of the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], - * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW - * @param[in] convolved_dims Output convolved dimensions. - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1); - /** Static function to check if given info will lead to a valid configuration of @ref CLCol2ImKernel - * - * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], - * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW - * @param[in] convolved_dims Output convolved dimensions. - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims, unsigned int num_groups = 1); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -public: - const ICLTensor *_input; - ICLTensor *_output; - Size2D _convolved_dims; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLCOL2IMKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLColorConvertKernel.h b/arm_compute/core/CL/kernels/CLColorConvertKernel.h deleted file mode 100644 index d57bb3de03..0000000000 --- a/arm_compute/core/CL/kernels/CLColorConvertKernel.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCOLORCONVERTKERNEL_H -#define ARM_COMPUTE_CLCOLORCONVERTKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLMultiImage; -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the color convert kernel. - * - */ -class CLColorConvertKernel : public ICLKernel -{ -public: - /** Default constructor. */ - CLColorConvertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLColorConvertKernel(const CLColorConvertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLColorConvertKernel &operator=(const CLColorConvertKernel &) = delete; - /** Allow instances of this class to be moved */ - CLColorConvertKernel(CLColorConvertKernel &&) = default; - /** Allow instances of this class to be moved */ - CLColorConvertKernel &operator=(CLColorConvertKernel &&) = default; - /** Default destructor. */ - ~CLColorConvertKernel() = default; - - /** Set the input and output of the kernel - * - * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 - * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), - * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), - * U8 (if the formats of @p input is RGB888) - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 - * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), - * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), - * U8 (if the formats of @p input is RGB888) - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 - */ - void configure(const ICLMultiImage *input, ICLImage *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 - */ - void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output); - /** Set the input and output of the kernel - * - * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) - */ - void configure(const ICLImage *input, ICLMultiImage *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) - */ - void configure(const ICLMultiImage *input, ICLMultiImage *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) - */ - void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /*pointer to single planar tensor input */ - ICLTensor *_output; /*pointer to single planar tensor output */ - const ICLMultiImage *_multi_input; /*pointer to multi-planar input */ - ICLMultiImage *_multi_output; /*pointer to multi-planar output */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLCOLORCONVERTKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLComparisonKernel.h b/arm_compute/core/CL/kernels/CLComparisonKernel.h deleted file mode 100644 index bbf5f19e2f..0000000000 --- a/arm_compute/core/CL/kernels/CLComparisonKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCOMPARISONKERNEL_H -#define ARM_COMPUTE_CLCOMPARISONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** Interface for the comparison kernel. */ -class CLComparisonKernel : public ICLKernel -{ -public: - /** Default constructor. */ - CLComparisonKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLComparisonKernel(const CLComparisonKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLComparisonKernel &operator=(const CLComparisonKernel &) = delete; - /** Allow instances of this class to be moved */ - CLComparisonKernel(CLComparisonKernel &&) = default; - /** Allow instances of this class to be moved */ - CLComparisonKernel &operator=(CLComparisonKernel &&) = default; - /** Default destructor */ - ~CLComparisonKernel() = default; - /** Set the inputs and output tensors - * - * @param[in] input1 Source tensor. Data types supported: All. - * @param[in] input2 Source tensor. Data types supported: Same as @p input1. - * @param[out] output Destination tensor. Data types supported: U8. - * @param[in] operation Comparison operation to use. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation); - /** Set the inputs and output tensors - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 Source tensor. Data types supported: All. - * @param[in] input2 Source tensor. Data types supported: Same as @p input1. - * @param[out] output Destination tensor. Data types supported: U8. - * @param[in] operation Comparison operation to use. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation); - /** Static function to check if given info will lead to a valid configuration of @ref CLComparisonKernel - * - * @param[in] input1 Source tensor. Data types supported: All. - * @param[in] input2 Source tensor. Data types supported: Same as @p input1. - * @param[in] output Destination tensor. Data types supported: U8. - * @param[in] operation Comparison operation to use. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation operation); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input1; /**< Source tensor 1 */ - const ICLTensor *_input2; /**< Source tensor 2 */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLCOMPARISONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h b/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h deleted file mode 100644 index 5d9e9bdd85..0000000000 --- a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H -#define ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa. - * - * @note This function can be applied to the 2D weights used by a Fully Connected layer if: - * - It follows a Convolution layer - * - The data layout used by the network does not match the one the model has been trained in. - * - * @note This function assumes the weights are already reshaped (transposed) - */ -class CLConvertFullyConnectedWeightsKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLConvertFullyConnectedWeightsKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLConvertFullyConnectedWeightsKernel(const CLConvertFullyConnectedWeightsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLConvertFullyConnectedWeightsKernel &operator=(const CLConvertFullyConnectedWeightsKernel &) = delete; - /** Allow instances of this class to be moved */ - CLConvertFullyConnectedWeightsKernel(CLConvertFullyConnectedWeightsKernel &&) = default; - /** Allow instances of this class to be moved */ - CLConvertFullyConnectedWeightsKernel &operator=(CLConvertFullyConnectedWeightsKernel &&) = default; - /** Default destructor */ - ~CLConvertFullyConnectedWeightsKernel() = default; - /** Set the input and output tensor. - * - * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. - * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. - * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). - * @param[in] data_layout The data layout the weights have been trained in. - */ - void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout); - /** Set the input and output tensor. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. - * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. - * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). - * @param[in] data_layout The data layout the weights have been trained in. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout); - /** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeightsKernel - * - * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All. - * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input. - * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). - * @param[in] data_layout The data layout the weights have been trained in. - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLConvolutionKernel.h b/arm_compute/core/CL/kernels/CLConvolutionKernel.h deleted file mode 100644 index 0f500fb63a..0000000000 --- a/arm_compute/core/CL/kernels/CLConvolutionKernel.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCONVOLUTIONKERNEL_H -#define ARM_COMPUTE_CLCONVOLUTIONKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -#include - -namespace arm_compute -{ -class ICLTensor; - -/****************************************************************************************\ - * Square Convolution * -\****************************************************************************************/ - -/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9). - * The client can supply a convolution matrix \f$ C_{m,n} \f$. - * @f{eqnarray}{ - * k_0 &=& \frac{m}{2} \\ - * l_0 &=& \frac{n}{2} \\ - * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l} - * @f} - * - * @note The above equation for this function is similar to the default OpenCV Filter2D function, - * which actually computes a correlation and not a convolution. - * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically. - */ -template -class CLConvolutionKernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; - -/** Interface for the kernel which applies a 3x3 convolution to a tensor. */ -using CLConvolution3x3Kernel = CLConvolutionKernel<3>; -/** Interface for the kernel which applies a 5x5 convolution to a tensor. */ -using CLConvolution5x5Kernel = CLConvolutionKernel<5>; -/** Interface for the kernel which applies a 7x7 convolution to a tensor. */ -using CLConvolution7x7Kernel = CLConvolutionKernel<7>; -/** Interface for the kernel which applies a 9x9 convolution to a tensor. */ -using CLConvolution9x9Kernel = CLConvolutionKernel<9>; - -/****************************************************************************************\ - * Separable Square Convolution * -\****************************************************************************************/ - -/** Kernel for the Horizontal pass of a Separable Convolution. Currently support 5x5, 7x7, 9x9 */ -template -class CLSeparableConvolutionHorKernel : public ICLSimple2DKernel -{ -public: - /** Default Constructor */ - CLSeparableConvolutionHorKernel(); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U16/S16/S32. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; - -private: - BorderSize _border_size; /**< Border size */ -}; - -/** Interface for the kernel which applies a horizontal pass of 5x5 convolution to a tensor. */ -using CLSeparableConvolution5x5HorKernel = CLSeparableConvolutionHorKernel<5>; -/** Interface for the kernel which applies a horizontal pass of 7x7 convolution to a tensor. */ -using CLSeparableConvolution7x7HorKernel = CLSeparableConvolutionHorKernel<7>; -/** Interface for the kernel which applies a horizontal pass of 9x9 convolution to a tensor. */ -using CLSeparableConvolution9x9HorKernel = CLSeparableConvolutionHorKernel<9>; - -/** Kernel for the Vertical pass of a Separable Convolution. Currently supports 5x5, 7x7, 9x9 */ -template -class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data types supported: U16/S16/S32. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] scale Scale of the convolution matrix. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution - */ - void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U16/S16/S32. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] scale Scale of the convolution matrix. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; - -/** Interface for the kernel which applies a vertical pass of 5x5 convolution to a tensor. */ -using CLSeparableConvolution5x5VertKernel = CLSeparableConvolutionVertKernel<5>; -/** Interface for the kernel which applies a vertical pass of 7x7 convolution to a tensor. */ -using CLSeparableConvolution7x7VertKernel = CLSeparableConvolutionVertKernel<7>; -/** Interface for the kernel which applies a vertical pass of 9x9 convolution to a tensor. */ -using CLSeparableConvolution9x9VertKernel = CLSeparableConvolutionVertKernel<9>; - -/****************************************************************************************\ - * Rectangle Convolution * -\****************************************************************************************/ - -/** Kernel for the running convolution on a rectangle matrix. - * - * @note Supports combinations of 3,5,7 and 9. - */ -class CLConvolutionRectangleKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLConvolutionRectangleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLConvolutionRectangleKernel(const CLConvolutionRectangleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLConvolutionRectangleKernel &operator=(const CLConvolutionRectangleKernel &) = delete; - /** Allow instances of this class to be moved */ - CLConvolutionRectangleKernel(CLConvolutionRectangleKernel &&) = default; - /** Allow instances of this class to be moved */ - CLConvolutionRectangleKernel &operator=(CLConvolutionRectangleKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] width Width of convolution matrix (Number of columns) - * @param[in] height Height of convolution matrix (Number of rows) - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] width Width of convolution matrix (Number of columns) - * @param[in] height Height of convolution matrix (Number of rows) - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - BorderSize _border_size; - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLCONVOLUTIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLCopyKernel.h b/arm_compute/core/CL/kernels/CLCopyKernel.h deleted file mode 100644 index 5c91e27935..0000000000 --- a/arm_compute/core/CL/kernels/CLCopyKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCOPYKERNEL_H -#define ARM_COMPUTE_CLCOPYKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform a copy between two tensors */ -class CLCopyKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLCopyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLCopyKernel(const CLCopyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLCopyKernel &operator=(const CLCopyKernel &) = delete; - /** Allow instances of this class to be moved */ - CLCopyKernel(CLCopyKernel &&) = default; - /** Allow instances of this class to be moved */ - CLCopyKernel &operator=(CLCopyKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input Source tensor. Data types supported: All. - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. - */ - void configure(const ICLTensor *input, ICLTensor *output, Window *output_window = nullptr); - /** Initialize the kernel's input, output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: All. - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Window *output_window = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref CLCopyKernel - * - * @param[in] input Source tensor info. Data types supported: All. - * @param[in] output Destination tensor info. Data types supported: same as @p input. - * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, Window *output_window = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - Window _output_window; - bool _has_output_window; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLCOPYKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLCropKernel.h b/arm_compute/core/CL/kernels/CLCropKernel.h deleted file mode 100644 index 91d70e6c1b..0000000000 --- a/arm_compute/core/CL/kernels/CLCropKernel.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCROPKERNEL_H -#define ARM_COMPUTE_CLCROPKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform a copy between two tensors */ -class CLCropKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLCropKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLCropKernel(const CLCropKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLCropKernel &operator=(const CLCropKernel &) = delete; - /** Allow instances of this class to be moved */ - CLCropKernel(CLCropKernel &&) = default; - /** Allow instances of this class to be moved */ - CLCropKernel &operator=(CLCropKernel &&) = default; - /** Configure kernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC. - * @param[out] output Destination tensor. Data type supported: F32 - * @param[in] start Coordinates of where to start cropping the image. - * @param[in] end Coordinates of where to end cropping the image. - * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input. - * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. - * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. - */ - void configure(const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, Window *output_window = nullptr); - /** Configure kernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC. - * @param[out] output Destination tensor. Data type supported: F32 - * @param[in] start Coordinates of where to start cropping the image. - * @param[in] end Coordinates of where to end cropping the image. - * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input. - * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. - * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, - Window *output_window = nullptr); - - /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] input Source tensor info. Data type supported: All. Data layouts supported: NHWC. - * @param[in] output Destination tensor info. Data type supported: F32 - * @param[in] start Coordinates of where to start cropping the image. - * @param[in] end Coordinates of where to end cropping the image. - * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input. - * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. - * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, - Window *output_window = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - Coordinates2D _start; - uint32_t _batch_index; - float _extrapolation_value; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLCROPKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h deleted file mode 100644 index 84265a2329..0000000000 --- a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H -#define ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the Deconvolution layer kernel on OpenCL. - */ -class CLDeconvolutionLayerUpsampleKernel : public ICLKernel -{ -public: - /** Constructor */ - CLDeconvolutionLayerUpsampleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDeconvolutionLayerUpsampleKernel(const CLDeconvolutionLayerUpsampleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDeconvolutionLayerUpsampleKernel &operator=(const CLDeconvolutionLayerUpsampleKernel &) = delete; - /** Default Move Constructor. */ - CLDeconvolutionLayerUpsampleKernel(CLDeconvolutionLayerUpsampleKernel &&) = default; - /** Default move assignment operator */ - CLDeconvolutionLayerUpsampleKernel &operator=(CLDeconvolutionLayerUpsampleKernel &&) = default; - /** Default destructor */ - ~CLDeconvolutionLayerUpsampleKernel() = default; - - /** Initialise the kernel's input and output. - * - * @param[in] input Source tensor. Data types supported: All. - * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. - */ - void configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: All. - * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample - * - * @param[in] input Source tensor info. Data types supported: All. - * @param[in] output Destination tensor info. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - PadStrideInfo _info; - DataLayout _data_layout; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h deleted file mode 100644 index 688c943593..0000000000 --- a/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H -#define ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H - -#include "arm_compute/core/CL/ICLSimpleKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the OpenCL kernel to be used for reshaping the tensor before returning the result of deconvolution. - * - * The input tensor to this OpenCL kernel is expected to be the result of a @ref CLGEMM operation between the Deconvolution input and the Deconvolution filter. - * - * The input tensor should have the following shape: [filter_width * filter_height * ofms, width, height, batch_size] - * - * The output tensor should have the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size] - * - * For example, given a tensor with dimensions [4, 2, 2] this function returns a tensor with dimensions [1, 4, 4]. - * - */ -class CLDeconvolutionReshapeOutputKernel : public ICLSimpleKernel -{ -public: - /** Default constructor */ - CLDeconvolutionReshapeOutputKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDeconvolutionReshapeOutputKernel(const CLDeconvolutionReshapeOutputKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDeconvolutionReshapeOutputKernel &operator=(const CLDeconvolutionReshapeOutputKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDeconvolutionReshapeOutputKernel(CLDeconvolutionReshapeOutputKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDeconvolutionReshapeOutputKernel &operator=(CLDeconvolutionReshapeOutputKernel &&) = default; - /** Default destructor */ - ~CLDeconvolutionReshapeOutputKernel() = default; - - /** Initialise the kernel's source and destination. - * - * @param[in] input Input tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. - * @param[in] bias Bias tensor to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[out] output Output tensor with the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size] - * Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] input_info Deconvolution input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. - */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info); - /** Initialise the kernel's source and destination. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. - * @param[in] bias Bias tensor to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[out] output Output tensor with the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size] - * Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] input_info Deconvolution input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, - const PadStrideInfo &deconv_info); - - /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionReshapeOutputKernel. - * - * @param[in] input GEMM output tensor info to be reshaped. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. - * @param[in] bias (Optional) Optional bias tensor info to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] output Reshaped output tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] input_info Original input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] weights_info Original weights tensor info output. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. - * - * @return a Status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - bool _add_bias; - const ICLTensor *_bias; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h deleted file mode 100644 index d8493bc5d8..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H -#define ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -/** Interface for the depth concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLDepthConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDepthConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthConcatenateLayerKernel(const CLDepthConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthConcatenateLayerKernel &operator=(const CLDepthConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDepthConcatenateLayerKernel(CLDepthConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDepthConcatenateLayerKernel &operator=(CLDepthConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLDepthConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] depth_offset The offset on the Z axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] depth_offset The offset on the Z axis. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - -private: - unsigned int _depth_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h deleted file mode 100644 index 7f9696d835..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H -#define ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H - -#include "arm_compute/core/CL/ICLSimple3DKernel.h" -#include "arm_compute/core/Types.h" - -#include - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the depth conversion kernel. */ -class CLDepthConvertLayerKernel : public ICLSimple3DKernel -{ -public: - /** Set the input and output of the kernel. - * - * Valid conversions Input -> Output : - * - * - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data) - * - U8 -> S8, U16, S16, U32, S32, F16, F32 - * - U16 -> U8, S8, S16, U32, S32, F16, F32 - * - S16 -> U8, S8, U16, U32, S32, F16, F32 - * - U32 -> U8, S8, U16, S16, S32, F16, F32 - * - S32 -> U8, S8, U16, S16, U32, F16, F32 - * - F16 -> U8, S8, U16, S16, U32, F32 - * - F32 -> U8, S8, U16, S16, U32, F16 - * - * @param[in] input The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32. - * @param[out] output The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. - * @param[in] policy Conversion policy - * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. - */ - void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); - /** Set the input and output of the kernel. - * - * Valid conversions Input -> Output : - * - * - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data) - * - U8 -> S8, U16, S16, U32, S32, F16, F32 - * - U16 -> U8, S8, S16, U32, S32, F16, F32 - * - S16 -> U8, S8, U16, U32, S32, F16, F32 - * - U32 -> U8, S8, U16, S16, S32, F16, F32 - * - S32 -> U8, S8, U16, S16, U32, F16, F32 - * - F16 -> U8, S8, U16, S16, U32, F32 - * - F32 -> U8, S8, U16, S16, U32, F16 - * - * @param[in] compile_context The compile context to be used. - * @param[in] input The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32. - * @param[out] output The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. - * @param[in] policy Conversion policy - * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayerKernel - * - * @param[in] input Source tensor info. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32. - * @param[in] output Destination tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. - * @param[in] policy Conversion policy - * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h deleted file mode 100644 index 1bd1e8e763..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H -#define ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the depth to space kernel */ -class CLDepthToSpaceLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDepthToSpaceLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthToSpaceLayerKernel(const CLDepthToSpaceLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthToSpaceLayerKernel &operator=(const CLDepthToSpaceLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDepthToSpaceLayerKernel(CLDepthToSpaceLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDepthToSpaceLayerKernel &operator=(CLDepthToSpaceLayerKernel &&) = default; - /** Default destructor */ - ~CLDepthToSpaceLayerKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape value. - */ - void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape); - /** Initialise the kernel's inputs and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape value. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape); - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayerKernel. - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. - * @param[in] output Tensor output info. Data types supported: same as @p input - * @param[in] block_shape Block shape value. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - ICLTensor *_output; /**< Destination tensor */ - int32_t _block_shape; /**< Block shape */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h deleted file mode 100644 index 93e7e374b0..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H -#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H - -#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NCHW. - */ -class CLDepthwiseConvolutionLayer3x3NCHWKernel : public ICLDepthwiseConvolutionLayer3x3Kernel -{ -public: - /** Default constructor */ - CLDepthwiseConvolutionLayer3x3NCHWKernel(); - /** Initialize the function's source, destination, conv and border_size. - * - * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; - /** Initialize the function's source, destination, conv and border_size. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NCHWKernel - * - * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor info. A 3D tensor with dimensions [3, 3, IFM]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. - * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor info for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, - const Size2D &dilation = Size2D(1U, 1U), const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr); - - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - unsigned int _conv_stride_x; - unsigned int _conv_pad_top; - unsigned int _conv_pad_left; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h deleted file mode 100644 index db57439de0..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H -#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H - -#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NHWC. - */ -class CLDepthwiseConvolutionLayer3x3NHWCKernel : public ICLDepthwiseConvolutionLayer3x3Kernel -{ -public: - /** Default constructor */ - CLDepthwiseConvolutionLayer3x3NHWCKernel(); - /** Default move assignment operator. */ - /** Initialize the function's source, destination, conv and border_size. - * - * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; - /** Initialize the function's source, destination, conv and border_size. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel - * - * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, 3, 3]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor info. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor info for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), - const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - unsigned int _num_planes_processed_per_iteration; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h deleted file mode 100644 index 03a0106cc9..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H -#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run a MxN depthwise convolution. M and N are respectively the rows and columns of the filter - This kernel assumes that tensor for the weights is NOT reshaped (Native version) */ -class CLDepthwiseConvolutionLayerNativeKernel : public ICLKernel -{ -public: - /** Default Constructor */ - CLDepthwiseConvolutionLayerNativeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthwiseConvolutionLayerNativeKernel(const CLDepthwiseConvolutionLayerNativeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthwiseConvolutionLayerNativeKernel &operator=(const CLDepthwiseConvolutionLayerNativeKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDepthwiseConvolutionLayerNativeKernel(CLDepthwiseConvolutionLayerNativeKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDepthwiseConvolutionLayerNativeKernel &operator=(CLDepthwiseConvolutionLayerNativeKernel &&) = default; - /** Initialize the function's source, destination and parameters - * - * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC - * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, N, M]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread - * @param[in] dwc_info Depthwise convolution layer info - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info, - const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); - /** Initialize the function's source, destination and parameters - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC - * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, N, M]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread - * @param[in] dwc_info Depthwise convolution layer info - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info, - const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel - * - * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC - * @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, N, M]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. - * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor info. Data type supported: Same as @p input. - * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread - * @param[in] dwc_info Depthwise convolution layer info - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCWeightsKernelInfo &dwc_weights_info, - const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U), - const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_weights; - const ICLTensor *_biases; - ICLTensor *_output; - unsigned int _depth_multiplier; - const ICLTensor *_output_multipliers; - const ICLTensor *_output_shifts; - bool _is_quantized; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h deleted file mode 100644 index 51aaf17600..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H -#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to reshape the weights of depthwise convolution. */ -class CLDepthwiseConvolutionLayerReshapeWeightsKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDepthwiseConvolutionLayerReshapeWeightsKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthwiseConvolutionLayerReshapeWeightsKernel(const CLDepthwiseConvolutionLayerReshapeWeightsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthwiseConvolutionLayerReshapeWeightsKernel &operator=(const CLDepthwiseConvolutionLayerReshapeWeightsKernel &) = delete; - /** Default Move Constructor. */ - CLDepthwiseConvolutionLayerReshapeWeightsKernel(CLDepthwiseConvolutionLayerReshapeWeightsKernel &&) = default; - /** Default move assignment operator */ - CLDepthwiseConvolutionLayerReshapeWeightsKernel &operator=(CLDepthwiseConvolutionLayerReshapeWeightsKernel &&) = default; - - /** Initialize the function's source and destination. - * - * @param[in] input The input tensor of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC - * @param[out] output The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights. - * @param[in] info Depthwise convolution information to reshape the input tensor. - */ - void configure(const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info); - /** Initialize the function's source and destination. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input The input tensor of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC - * @param[out] output The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights. - * @param[in] info Depthwise convolution information to reshape the input tensor. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel - * - * @param[in] input The input tensor info of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC - * @param[in] output The output tensor info of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights. - * @param[in] info Depthwise convolution information to reshape the input tensor. - * - * @return a Status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const DepthwiseConvolutionReshapeInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - - void configure_dot_product(const DepthwiseConvolutionReshapeInfo &info); - void configure_generic(const DepthwiseConvolutionReshapeInfo &info); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h deleted file mode 100644 index 7a582da132..0000000000 --- a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the dequantization layer kernel. */ -class CLDequantizationLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDequantizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDequantizationLayerKernel(const CLDequantizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDequantizationLayerKernel &operator=(const CLDequantizationLayerKernel &) = delete; - /** Default Move Constructor. */ - CLDequantizationLayerKernel(CLDequantizationLayerKernel &&) = default; - /** Default move assignment operator */ - CLDequantizationLayerKernel &operator=(CLDequantizationLayerKernel &&) = default; - /** Default destructor */ - ~CLDequantizationLayerKernel() = default; - /** Set the input, output, min and max. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. - * @param[out] output Destination tensor. Data types supported: F16/F32. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Set the input, output, min and max. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. - * @param[out] output Destination tensor. Data types supported: F16/F32. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel - * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. - * @param[in] output Output tensor info. Data types supported: F16/F32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDerivativeKernel.h b/arm_compute/core/CL/kernels/CLDerivativeKernel.h deleted file mode 100644 index b49905a5e6..0000000000 --- a/arm_compute/core/CL/kernels/CLDerivativeKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDERIVATIVEKERNEL_H -#define ARM_COMPUTE_CLDERIVATIVEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the derivative kernel. */ -class CLDerivativeKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDerivativeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDerivativeKernel(const CLDerivativeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDerivativeKernel &operator=(const CLDerivativeKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDerivativeKernel(CLDerivativeKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDerivativeKernel &operator=(CLDerivativeKernel &&) = default; - /** Default destructor */ - ~CLDerivativeKernel() = default; - /** Initialise the kernel's sources, destination and border - * - * @note At least one of output_x or output_y must be set - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's sources, destination and border - * - * @note At least one of output_x or output_y must be set - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; /**< Input tensor */ - ICLTensor *_output_x; /**< Output tensor - Derivate along the X direction */ - ICLTensor *_output_y; /**< Output tensor - Derivate along the Y direction */ - bool _run_derivative_x; /**< Do we need to run Derivative X ? */ - bool _run_derivative_y; /**< Do we need to run Derivative Y ? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDERIVATIVEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDilateKernel.h b/arm_compute/core/CL/kernels/CLDilateKernel.h deleted file mode 100644 index 747f8fa5ca..0000000000 --- a/arm_compute/core/CL/kernels/CLDilateKernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDILATEKERNEL_H -#define ARM_COMPUTE_CLDILATEKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the dilate kernel. - * - */ -class CLDilateKernel : public ICLSimple2DKernel -{ -public: - /**Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /**Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDILATEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h deleted file mode 100644 index 5281a0c306..0000000000 --- a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H -#define ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the direct convolution kernel. - */ -class CLDirectConvolutionLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDirectConvolutionLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDirectConvolutionLayerKernel(const CLDirectConvolutionLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDirectConvolutionLayerKernel &operator=(const CLDirectConvolutionLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDirectConvolutionLayerKernel(CLDirectConvolutionLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDirectConvolutionLayerKernel &operator=(CLDirectConvolutionLayerKernel &&) = default; - /** Default destructor */ - ~CLDirectConvolutionLayerKernel() = default; - /** Set the input, weights, biases and output tensors. - * - * @note: DirectConvolution only works in the following configurations: - * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 - * 3x3 convolution with stride_x = 1/2, stride_y = 1/2 - * 5x5 convolution with stride_x = 1/2, stride_y = 1/2 - * 9x9 convolution with stride_x = 1/2, stride_y = 1/2 - * - * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * The 3rd dimension must be the same as the input's volume 3rd dimension. - * Data type supported:Same as @p input. - * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type - * @param[out] output Output tensor. - * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); - /** Set the input, weights, biases and output tensors. - * - * @note: DirectConvolution only works in the following configurations: - * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 - * 3x3 convolution with stride_x = 1/2, stride_y = 1/2 - * 5x5 convolution with stride_x = 1/2, stride_y = 1/2 - * 9x9 convolution with stride_x = 1/2, stride_y = 1/2 - * - * @param[in] compile_context The compile context to be used. - * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * The 3rd dimension must be the same as the input's volume 3rd dimension. - * Data type supported:Same as @p input. - * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type - * @param[out] output Output tensor. - * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayerKernel - * - * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * The 3rd dimension must be the same as the input's volume 3rd dimension. - * Data type supported:Same as @p input. - * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type. - * @param[in] output Output tensor. - * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] target Target GPU architecture. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const GPUTarget target); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -public: - const ICLTensor *_input; - const ICLTensor *_biases; - const ICLTensor *_weights; - ICLTensor *_output; - DataLayout _data_layout; - BorderSize _border_size; - int _conv_stride_x; - int _conv_stride_y; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h b/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h deleted file mode 100644 index 82cd953b68..0000000000 --- a/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H -#define ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/CL/ICLSimpleKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -/** Interface for the elementwise unary operator */ -class CLElementWiseUnaryLayerKernel : public ICLKernel -{ -public: - /** Initialise the kernel's inputs, output. - * - * @param[in] input First tensor input info. Data types supported: F16/F32. - * @param[out] output Output tensor info. Data types supported: Same as @p input. - * @param[in] op Element wise unary operation to perform. - */ - void configure(const ITensorInfo *input, ITensorInfo *output, const ElementWiseUnary &op); - /** Initialise the kernel's inputs, output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input First tensor input info. Data types supported: F16/F32. - * @param[out] output Output tensor info. Data types supported: Same as @p input. - * @param[in] op Element wise unary operation to perform. - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const ElementWiseUnary &op); - /** Static function to check if given info will lead to a valid configuration of @ref CLElementWiseUnaryLayerKernel - * - * @param[in] input First tensor input info. Data types supported: F16/F32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * @param[in] op Element wise unary operation to perform. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ElementWiseUnary &op); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h deleted file mode 100644 index 80737cb8eb..0000000000 --- a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H -#define ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for an element-wise operation kernel - * - * Element-wise operation is computed by: - * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f] - * - */ -class CLElementwiseOperationKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLElementwiseOperationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLElementwiseOperationKernel(const CLElementwiseOperationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLElementwiseOperationKernel &operator=(const CLElementwiseOperationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLElementwiseOperationKernel(CLElementwiseOperationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLElementwiseOperationKernel &operator=(CLElementwiseOperationKernel &&) = default; - /** Default destructor */ - ~CLElementwiseOperationKernel() = default; - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - -protected: - /** The name of the operation */ - virtual std::string name() = 0; - - /** Initialise the kernel's output. - * - * @param[in] input1 First tensor input info. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a pair of Status and Window - */ - virtual std::pair validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) = 0; - - /** Generate the build options for the specific kernel - * - * @reutrn a CLBuildOptions struct - */ - virtual CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) = 0; - - /** Generate the identifier for tuning - * - * @reutrn a string - */ - virtual std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) = 0; - - /** Commmon configure function for element-wise operators with no additional options (e.g., Div, Min, Max, SquaredDiff) - * - */ - void configure_common(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output); - /** Commmon configure function for element-wise operators with no additional options (e.g., Div, Min, Max, SquaredDiff) - * - */ - void configure_common(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output); - - ActivationLayerInfo _act_info; - -private: - const ITensorInfo *_input1; /**< Source tensor info 1 */ - const ITensorInfo *_input2; /**< Source tensor info 2 */ - ITensorInfo *_output; /**< Destination tensor info */ -}; - -/** Addition operation */ -class CLSaturatedArithmeticOperationKernel : public CLElementwiseOperationKernel -{ -public: - CLSaturatedArithmeticOperationKernel() - : CLElementwiseOperationKernel(), _policy(), _op() - { - } - - /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * @param[in] policy Policy to use to handle overflow. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ConvertPolicy &policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * @param[in] policy Policy to use to handle overflow. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ConvertPolicy &policy, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input info info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. - * @param[in] input2 Second tensor input info info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info info. Data types supported: Same as @p input1. - * @param[in] policy Policy to use to handle overflow. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a Status - */ - static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ConvertPolicy &policy, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); - -protected: - // Inherited methods overridden: - std::string name() override; - std::pair validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) override; - CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override; - std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) override; - -private: - ConvertPolicy _policy; - ArithmeticOperation _op; -}; - -class CLArithmeticOperationKernel : public CLElementwiseOperationKernel -{ -public: - CLArithmeticOperationKernel() - : CLElementwiseOperationKernel(), _op() - { - } - - /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a Status - */ - static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - -protected: - // Inherited methods overridden: - std::string name() override; - std::pair validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) override; - CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override; - std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) override; - -private: - ArithmeticOperation _op; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLErodeKernel.h b/arm_compute/core/CL/kernels/CLErodeKernel.h deleted file mode 100644 index 620201d625..0000000000 --- a/arm_compute/core/CL/kernels/CLErodeKernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLERODEKERNEL_H -#define ARM_COMPUTE_CLERODEKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the erode kernel. - * - */ -class CLErodeKernel : public ICLSimple2DKernel -{ -public: - /**Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /**Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLERODEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h b/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h deleted file mode 100644 index a196c8c64f..0000000000 --- a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H -#define ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** Interface for the digit reverse operation kernel. */ -class CLFFTDigitReverseKernel : public ICLKernel -{ -public: - /** Constructor */ - CLFFTDigitReverseKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFFTDigitReverseKernel(const CLFFTDigitReverseKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFFTDigitReverseKernel &operator=(const CLFFTDigitReverseKernel &) = delete; - /** Default Move Constructor. */ - CLFFTDigitReverseKernel(CLFFTDigitReverseKernel &&) = default; - /** Default move assignment operator */ - CLFFTDigitReverseKernel &operator=(CLFFTDigitReverseKernel &&) = default; - /** Default destructor */ - ~CLFFTDigitReverseKernel() = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] idx Digit reverse index tensor. Data type supported: U32 - * @param[in] config Kernel configuration. - */ - void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] idx Digit reverse index tensor. Data type supported: U32 - * @param[in] config Kernel configuration. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config); - /** Static function to check if given info will lead to a valid configuration of @ref CLFFTDigitReverseKernel - * - * @param[in] input Source tensor info. Data types supported: F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] idx Digit reverse index tensor info. Data type supported: U32 - * @param[in] config Kernel configuration. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - const ICLTensor *_idx; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h b/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h deleted file mode 100644 index d6d6067bc4..0000000000 --- a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H -#define ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -#include - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** Interface for the FFT radix stage kernel. */ -class CLFFTRadixStageKernel : public ICLKernel -{ -public: - /** Constructor */ - CLFFTRadixStageKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFFTRadixStageKernel(const CLFFTRadixStageKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFFTRadixStageKernel &operator=(const CLFFTRadixStageKernel &) = delete; - /** Default Move Constructor. */ - CLFFTRadixStageKernel(CLFFTRadixStageKernel &&) = default; - /** Default move assignment operator */ - CLFFTRadixStageKernel &operator=(CLFFTRadixStageKernel &&) = default; - /** Default destructor */ - ~CLFFTRadixStageKernel() = default; - /** Set the input and output tensors. - * - * @note If the output tensor is nullptr, the FFT will be performed in-place - * - * @param[in,out] input Source tensor. Data types supported: F32. - * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input - * @param[in] config FFT descriptor metadata. - */ - void configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config); - /** Set the input and output tensors. - * - * @note If the output tensor is nullptr, the FFT will be performed in-place - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] input Source tensor. Data types supported: F32. - * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input - * @param[in] config FFT descriptor metadata. - */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config); - /** Static function to check if given info will lead to a valid configuration of @ref CLFFTRadixStageKernel - * - * @param[in] input Source tensor info. Data types supported: F32. - * @param[in] output Destination tensor info. Can be nullptr. Data type supported: same as @p input - * @param[in] config FFT descriptor metadata. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config); - /** Returns the radix that are support by the FFT kernel - * - * @return A set of supported radix - */ - static std::set supported_radix(); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; - ICLTensor *_output; - bool _run_in_place; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLFFTScaleKernel.h b/arm_compute/core/CL/kernels/CLFFTScaleKernel.h deleted file mode 100644 index c6dd176f58..0000000000 --- a/arm_compute/core/CL/kernels/CLFFTScaleKernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFFTSCALEKERNEL_H -#define ARM_COMPUTE_CLFFTSCALEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** Interface for the inverse fft scale kernel. */ -class CLFFTScaleKernel : public ICLKernel -{ -public: - /** Constructor */ - CLFFTScaleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFFTScaleKernel(const CLFFTScaleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFFTScaleKernel &operator=(const CLFFTScaleKernel &) = delete; - /** Default Move Constructor. */ - CLFFTScaleKernel(CLFFTScaleKernel &&) = default; - /** Default move assignment operator */ - CLFFTScaleKernel &operator=(CLFFTScaleKernel &&) = default; - /** Default destructor */ - ~CLFFTScaleKernel() = default; - /** Set the input and output tensors. - * - * @param[in,out] input Source tensor. Data types supported: F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] config Kernel configuration - */ - void configure(ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] input Source tensor. Data types supported: F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] config Kernel configuration - */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config); - /** Static function to check if given info will lead to a valid configuration of @ref CLFFTScaleKernel - * - * @param[in] input Source tensor info. Data types supported: F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] config Kernel configuration - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; - ICLTensor *_output; - bool _run_in_place; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLFFTSCALEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLFastCornersKernel.h b/arm_compute/core/CL/kernels/CLFastCornersKernel.h deleted file mode 100644 index 5d0da7d5d5..0000000000 --- a/arm_compute/core/CL/kernels/CLFastCornersKernel.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFASTCORNERSKERNEL_H -#define ARM_COMPUTE_CLFASTCORNERSKERNEL_H - -#include "arm_compute/core/CL/ICLArray.h" -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -#include - -namespace cl -{ -class Buffer; -} - -namespace arm_compute -{ -class ICLTensor; -using ICLImage = ICLTensor; - -/** CL kernel to perform fast corners */ -class CLFastCornersKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLFastCornersKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFastCornersKernel(const CLFastCornersKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFastCornersKernel &operator=(const CLFastCornersKernel &) = delete; - /** Allow instances of this class to be moved */ - CLFastCornersKernel(CLFastCornersKernel &&) = default; - /** Allow instances of this class to be moved */ - CLFastCornersKernel &operator=(CLFastCornersKernel &&) = default; - /** Default destructor */ - ~CLFastCornersKernel() = default; - - /** Initialise the kernel. - * - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Output image. Data types supported: U8. - * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. - * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. - * @param[in] border_mode Strategy to use for borders. - */ - void configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode); - /** Initialise the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Output image. Data types supported: U8. - * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. - * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. - * @param[in] border_mode Strategy to use for borders. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode); - - // Inherited methods overridden - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLImage *_input; - ICLImage *_output; -}; - -/** CL kernel to copy keypoints information to ICLKeyPointArray and counts the number of key points */ -class CLCopyToArrayKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLCopyToArrayKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLCopyToArrayKernel(const CLCopyToArrayKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLCopyToArrayKernel &operator=(const CLCopyToArrayKernel &) = delete; - /** Allow instances of this class to be moved */ - CLCopyToArrayKernel(CLCopyToArrayKernel &&) = default; - /** Allow instances of this class to be moved */ - CLCopyToArrayKernel &operator=(CLCopyToArrayKernel &&) = default; - /** Default destructor */ - ~CLCopyToArrayKernel() = default; - - /** Initialise the kernel. - * - * @param[in] input Source image. Data types supported: U8. - * @param[in] update_number Flag to indicate whether we need to update the number of corners - * @param[out] corners Array of keypoints to store the results. - * @param[out] num_buffers Number of keypoints to store the results. - */ - void configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers); - /** Initialise the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source image. Data types supported: U8. - * @param[in] update_number Flag to indicate whether we need to update the number of corners - * @param[out] corners Array of keypoints to store the results. - * @param[out] num_buffers Number of keypoints to store the results. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLImage *_input; /**< source image */ - ICLKeyPointArray *_corners; /**< destination array */ - cl::Buffer *_num_buffer; /**< CL memory to record number of key points in the array */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLFASTCORNERSKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/arm_compute/core/CL/kernels/CLFillBorderKernel.h deleted file mode 100644 index 5323af4c0e..0000000000 --- a/arm_compute/core/CL/kernels/CLFillBorderKernel.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFILLBORDERKERNEL_H -#define ARM_COMPUTE_CLFILLBORDERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for filling the border of a kernel */ -class CLFillBorderKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLFillBorderKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFillBorderKernel(const CLFillBorderKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFillBorderKernel &operator=(const CLFillBorderKernel &) = delete; - /** Allow instances of this class to be moved */ - CLFillBorderKernel(CLFillBorderKernel &&) = default; - /** Allow instances of this class to be moved */ - CLFillBorderKernel &operator=(CLFillBorderKernel &&) = default; - /** Default destructor */ - ~CLFillBorderKernel() = default; - - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32. - * @param[in] border_size Size of the border to fill in elements. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(const CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); - /** Initialise the kernel's input, output and border mode. - * - * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32. - * @param[in] border_size Size of the border to fill in elements. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32. - * @param[in] border_size Size of the border to fill in elements. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); - - /** Function to set the constant value on fill border kernel depending on type. - * - * @param[in] idx Index of the kernel argument to set. - * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT. - */ - template - void set_constant_border(unsigned int idx, const PixelValue &constant_border_value); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - void run(const Window &window, cl::CommandQueue &queue) override; - bool is_parallelisable() const override; - -private: - ICLTensor *_tensor; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLFILLBORDERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h b/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h deleted file mode 100644 index 4df0b33c8e..0000000000 --- a/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFLATTENLAYERKERNEL_H -#define ARM_COMPUTE_CLFLATTENLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL interface for the flatten kernel.*/ -class CLFlattenLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLFlattenLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFlattenLayerKernel(const CLFlattenLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFlattenLayerKernel &operator=(const CLFlattenLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLFlattenLayerKernel(CLFlattenLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLFlattenLayerKernel &operator=(CLFlattenLayerKernel &&) = default; - /** Set the input and output of the kernel. - * - * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: All. - * @param[out] output Output tensor with shape [w*h*d, input_batches] where: - * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Set the input and output of the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: All. - * @param[out] output Output tensor with shape [w*h*d, input_batches] where: - * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLFlattenLayerKernel - * - * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: All. - * @param[out] output Output tensor with shape [w*h*d, input_batches] where: - * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -public: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLFLATTENLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLFloorKernel.h b/arm_compute/core/CL/kernels/CLFloorKernel.h deleted file mode 100644 index 3b1d3f10cc..0000000000 --- a/arm_compute/core/CL/kernels/CLFloorKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFLOORKERNEL_H -#define ARM_COMPUTE_CLFLOORKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform a floor operation */ -class CLFloorKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLFloorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFloorKernel(const CLFloorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFloorKernel &operator=(const CLFloorKernel &) = delete; - /** Allow instances of this class to be moved */ - CLFloorKernel(CLFloorKernel &&) = default; - /** Allow instances of this class to be moved */ - CLFloorKernel &operator=(CLFloorKernel &&) = default; - /** Default destructor */ - ~CLFloorKernel() = default; - /** Set the source, destination of the kernel - * - * @param[in] input Source tensor. Data type supported: F16/F32. - * @param[out] output Destination tensor. Same as @p input - */ - void configure(const ICLTensor *input, ICLTensor *output); - - /** Set the source, destination of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data type supported: F16/F32. - * @param[out] output Destination tensor. Same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - - /** Static function to check if given info will lead to a valid configuration of @ref CLFloorKernel - * - * @param[in] input Source tensor info. Data type supported: F16/F32. - * @param[in] output Destination tensor info. Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLFLOORKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h b/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h deleted file mode 100644 index 3ec251c858..0000000000 --- a/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H -#define ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** OpenCL kernel to fuse the batch normalization node to a preceding convolution node */ -class CLFuseBatchNormalizationKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLFuseBatchNormalizationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFuseBatchNormalizationKernel(const CLFuseBatchNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFuseBatchNormalizationKernel &operator=(const CLFuseBatchNormalizationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLFuseBatchNormalizationKernel(CLFuseBatchNormalizationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLFuseBatchNormalizationKernel &operator=(CLFuseBatchNormalizationKernel &&) = default; - /** Default destructor */ - ~CLFuseBatchNormalizationKernel() = default; - /** Set the source, destination of the kernel - * - * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC - * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights - * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights - * @param[out] fused_weights Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights - * @param[out] fused_bias Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights - * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights - * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights - * @note if nullptr, bn_beta is set to 0.0 - * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights - * @note if nullptr, bn_gamma is set to 1.0 - * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. - * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. - */ - void configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias, - const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr, - float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); - /** Set the source, destination of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC - * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights - * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights - * @param[out] fused_weights Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights - * @param[out] fused_bias Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights - * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights - * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights - * @note if nullptr, bn_beta is set to 0.0 - * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights - * @note if nullptr, bn_gamma is set to 1.0 - * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. - * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias, - const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr, - float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); - /** Static function to check if given info will lead to a valid configuration of @ref CLFuseBatchNormalizationKernel - * - * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC - * @param[in] bn_mean Batch normalization layer mean tensor info. Same as @p input_weights - * @param[in] bn_var Batch normalization layer variance tensor info. Same as @p input_weights - * @param[in] fused_weights Output fused weights tensor info. It can be a nullptr in case of in-place computation. Same as @p input_weights - * @param[in] fused_bias Output fused bias tensor info. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights - * @param[in] input_bias (Optional) Input bias tensor info for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights - * @param[in] bn_beta (Optional) Batch normalization layer beta tensor info. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights - * @note if nullptr, bn_beta is set to 0.0 - * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor info. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights - * @note if nullptr, bn_gamma is set to 1.0 - * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. - * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. - * - * @return a status - */ - static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, - const ITensorInfo *fused_weights, const ITensorInfo *fused_bias, - const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr, - float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input_weights; - const ICLTensor *_input_bias; - const ICLTensor *_bn_mean; - const ICLTensor *_bn_var; - const ICLTensor *_bn_gamma; - const ICLTensor *_bn_beta; - ICLTensor *_fused_weights; - ICLTensor *_fused_bias; - float _epsilon; - bool _run_in_place_weights; - bool _run_in_place_bias; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h deleted file mode 100644 index 2d5e4a3346..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply matrices with QASYMM8/QASYMM8_SIGNED data type */ -class CLGEMMLowpMatrixMultiplyNativeKernel : public ICLKernel -{ -public: - /** Default Constructor */ - CLGEMMLowpMatrixMultiplyNativeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyNativeKernel(const CLGEMMLowpMatrixMultiplyNativeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyNativeKernel &operator=(const CLGEMMLowpMatrixMultiplyNativeKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpMatrixMultiplyNativeKernel(CLGEMMLowpMatrixMultiplyNativeKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpMatrixMultiplyNativeKernel &operator=(CLGEMMLowpMatrixMultiplyNativeKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same as lhs_info.k0 - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same as lhs_info.k0 - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyNativeKernel - * - * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] input1 Input tensor info for the RHS matrix. Data type supported: same as @p input0 - * @param[in] output Output tensor info. Data type supported: S32 - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same as lhs_info.k0 - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - ICLTensor *_output; - bool _slide_matrix_b; - bool _reinterpret_input_as_3d; - bool _reinterpret_output_as_3d; - bool _use_dummy_work_items; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H*/ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h deleted file mode 100644 index f2eb447834..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply matrices when both the input matrices LHS (input0) and RHS (input1) have been reshaped - * - * @note The input matrices @p input0 and @p input1 must be reshaped through @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel - */ -class CLGEMMLowpMatrixMultiplyReshapedKernel : public ICLKernel -{ -public: - /** Default Constructor */ - CLGEMMLowpMatrixMultiplyReshapedKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyReshapedKernel(const CLGEMMLowpMatrixMultiplyReshapedKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyReshapedKernel &operator=(const CLGEMMLowpMatrixMultiplyReshapedKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpMatrixMultiplyReshapedKernel(CLGEMMLowpMatrixMultiplyReshapedKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpMatrixMultiplyReshapedKernel &operator=(CLGEMMLowpMatrixMultiplyReshapedKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 - * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.transpose: false - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same as lhs_info.k0 - * rhs_info.transpose: true - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @note lhs_info.k0 must be equal to rhs_info.k0 - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 - * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.transpose: false - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same as lhs_info.k0 - * rhs_info.transpose: true - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @note lhs_info.k0 must be equal to rhs_info.k0 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedKernel - * - * @param[in] input0 Input tensor info containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor info containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[in] output Output tensor info. Data type supported: S32 - * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.transpose: false - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: 2,3,4,8,16 - * rhs_info.transpose: true - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @note lhs_info.k0 must be equal to rhs_info.k0 - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - ICLTensor *_output; - bool _slide_matrix_b; - bool _reinterpret_output_as_3d; - unsigned int _k; - bool _use_dummy_work_items; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H*/ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h deleted file mode 100644 index a2295143de..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply matrices with QASYMM8 data type when only the input matrix RHS (input1) has been reshaped - * - * @note The input matrix input1 must be reshaped through @ref CLGEMMReshapeRHSMatrixKernel - * @note For fused output stage, only GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT type is supported - */ -class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel : public ICLKernel -{ -public: - /** Default Constructor */ - CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(const CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &operator=(const CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &operator=(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL - * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/S32. - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info. - * Only the following values are supported for LHS info: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * Only the following values are supported for RHS info: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same as lhs_info.k0 - * rhs_info.transpose: true - * @param[in] vector_sum_col (Optional) Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32 - * @param[in] vector_sum_row (Optional) Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32 - * @param[in] bias (Optional) Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32. - * @param[in] output_multipliers (Optional) Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32. - * @param[in] output_shifts (Optional) Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32. - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr, - const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0 - * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/S32. - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info. - * Only the following values are supported for LHS info: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * Only the following values are supported for RHS info: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same as lhs_info.k0 - * rhs_info.transpose: true - * @param[in] vector_sum_col (Optional) Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32 - * @param[in] vector_sum_row (Optional) Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32 - * @param[in] bias (Optional) Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32. - * @param[in] output_multipliers (Optional) Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32. - * @param[in] output_shifts (Optional) Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr, - const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel - * - * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] input1 Input tensor info for the RHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL - * @param[in] output Output tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/S32. - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info. - * Only the following values are supported for LHS info: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * Only the following values are supported for RHS info: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same as lhs_info.k0 - * rhs_info.transpose: true - * @param[in] vector_sum_col (Optional) Input row-vector info of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32 - * @param[in] vector_sum_row (Optional) Input row-vector info of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32 - * @param[in] bias (Optional) Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32. - * @param[in] output_multipliers (Optional) Output multipliers tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32. - * @param[in] output_shifts (Optional) Output shifts tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMKernelInfo &gemm_info, const ITensorInfo *vector_sum_col = nullptr, - const ITensorInfo *vector_sum_row = nullptr, const ITensorInfo *bias = nullptr, const ITensorInfo *output_multipliers = nullptr, - const ITensorInfo *output_shifts = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - ICLTensor *_output; - const ICLTensor *_vector_sum_col; - const ICLTensor *_vector_sum_row; - const ICLTensor *_bias; - const ICLTensor *_output_multipliers; - const ICLTensor *_output_shifts; - bool _slide_matrix_b; - bool _reinterpret_input_as_3d; - bool _reinterpret_output_as_3d; - bool _use_dummy_work_items; - bool _is_quantized_per_channel; - bool _fuse_output_stage; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H */ \ No newline at end of file diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h deleted file mode 100644 index 1d3b3110b3..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel used to add the offset contribution after the matrix multiplication. The computation is performed in-place - * - * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), - * and adds to it the offset contribution of matrix A and matrix B in-place. - * - * The final result is: - * - * mm_result[i][k] = mm_result[i][k] + - * (vector_sum_col[k] * a_offset) + - * (vector_sum_row[i] * b_offset) + - * (a_offset * b_offset * k) - * - */ -class CLGEMMLowpOffsetContributionKernel : public ICLKernel -{ -public: - /** Constructor */ - CLGEMMLowpOffsetContributionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpOffsetContributionKernel(const CLGEMMLowpOffsetContributionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpOffsetContributionKernel &operator=(const CLGEMMLowpOffsetContributionKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpOffsetContributionKernel(CLGEMMLowpOffsetContributionKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpOffsetContributionKernel &operator=(CLGEMMLowpOffsetContributionKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in, out] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] k Number of matrix A columns or Matrix B rows - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - */ - void configure(ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset, int32_t b_offset); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] k Number of matrix A columns or Matrix B rows - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - */ - void configure(const CLCompileContext &compile_context, ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset, - int32_t b_offset); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel - * - * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - * - * @return a status - */ - static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, int32_t a_offset, int32_t b_offset); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_vector_sum_col; - const ICLTensor *_vector_sum_row; - ICLTensor *_mm_result; - const ICLTensor *_bias; -}; -} // namespace arm_compute - -#endif /* ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h deleted file mode 100644 index e3f88c11e6..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel used to add the offset contribution after the matrix multiplication and perform the output stage. - * - * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), adds to it the offset contribution - * of matrix A and matrix B and performs the output stage defined by the output_stage argument - * - * @note For quantized computations the output data type for auto-initialization must be passed as part of the @ref GEMMLowpOutputStageInfo. - */ -class CLGEMMLowpOffsetContributionOutputStageKernel : public ICLKernel -{ -public: - /** Constructor */ - CLGEMMLowpOffsetContributionOutputStageKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpOffsetContributionOutputStageKernel(const CLGEMMLowpOffsetContributionOutputStageKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpOffsetContributionOutputStageKernel &operator=(const CLGEMMLowpOffsetContributionOutputStageKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpOffsetContributionOutputStageKernel(CLGEMMLowpOffsetContributionOutputStageKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpOffsetContributionOutputStageKernel &operator=(CLGEMMLowpOffsetContributionOutputStageKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED. - * @param[in] k Number of matrix A columns or Matrix B rows - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - * @param[in] output_stage GEMMLowp output stage info - * @param[in] output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32 - * @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32 - */ - void configure(const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k, int32_t a_offset, int32_t b_offset, - const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED. - * @param[in] k Number of matrix A columns or Matrix B rows - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - * @param[in] output_stage GEMMLowp output stage info - * @param[in] output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32 - * @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k, - int32_t a_offset, int32_t b_offset, - const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel - * - * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED. - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - * @param[in] output_stage GEMMLowp output stage info - * @param[in] output_multipliers Output multipliers tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32 - * @param[in] output_shifts Output shifts tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32 - * - * @return a status - */ - static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, const ITensorInfo *output, int32_t a_offset, - int32_t b_offset, const GEMMLowpOutputStageInfo &output_stage, const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_mm_result; - const ICLTensor *_vector_sum_col; - const ICLTensor *_vector_sum_row; - const ICLTensor *_bias; - ICLTensor *_output; - const ICLTensor *_output_multipliers; - const ICLTensor *_output_shifts; - bool _is_quantized_per_channel; -}; -} // namespace arm_compute - -#endif /* ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h deleted file mode 100644 index 23040e7bcc..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFIXEDPOINTKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFIXEDPOINTKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED/QSYMM16 - * - * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final quantized value. - * The following computations will be performed by the kernel: - * - * -# Compute fixed point multiplication between each entry of input by gemmlowp_multiplier - * -# Add bias to final result if bias tensor is not a nullptr - * -# Round to nearest division by a power-of-two using result_shift - * -# Add offset to each result - * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values to the proper quantized range and cast to QASYMM8/QASYMM8_SIGNED/QSYMM16. - */ -class CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel : public ICLKernel -{ -public: - /** Constructor */ - CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel(const CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel(CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM16. - * @param[in] info Output stage info. Used to pass the quantized output data type - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QSYMM8/QASYMM8_SIGNED/QSYMM16. - * @param[in] info Output stage info. Used to pass the quantized output data type - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_bias; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h deleted file mode 100644 index 0b3f23dab3..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED - * - * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. - * The following computations will be performed by the kernel: - * - * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier - * -# Add bias to final result if bias tensor is not a nullptr - * -# Requantize - * -# Add offset to each result - * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values to - * - to the [0..255] range and cast to QASYMM8. - * - to the [-128..127] range and cast to QASYMM8_SIGNED. - */ -class CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel : public ICLKernel -{ -public: - /** Constructor */ - CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(const CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] info Output stage info. Used to pass the quantized output data type - */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] info Output stage info. Used to pass the quantized output data type - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] info Output stage info. Used to pass the quantized output data type - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_bias; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h deleted file mode 100644 index 1a284f0701..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED - * - * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. - * The following computations will be performed by the kernel: - * - * -# Add offset terms to final result - * -# Multiply each entry of result by result_mult_int - * -# Add bias to final result if bias tensor is not a nullptr - * -# Shift the int32 accumulator by result_shift - * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values: - * -# -to the [0..255] range and cast to QASYMM8. - * -# -to the [-128..127] range and cast to QASYMM8_SIGNED. - * - */ -class CLGEMMLowpQuantizeDownInt32ScaleKernel : public ICLKernel -{ -public: - /** Constructor */ - CLGEMMLowpQuantizeDownInt32ScaleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ScaleKernel(const CLGEMMLowpQuantizeDownInt32ScaleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ScaleKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ScaleKernel(CLGEMMLowpQuantizeDownInt32ScaleKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ScaleKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] output_stage GEMMLowp output stage metadata. - */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] output_stage GEMMLowp output stage metadata. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleKernel - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] output_stage GEMMLowp output stage metadata. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_bias; - ICLTensor *_output; -}; -} // namespace arm_compute - -#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H */ \ No newline at end of file diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h deleted file mode 100644 index 6066e2a815..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; -struct GEMMLowpReductionKernelInfo; - -/** Common interface for all OpenCL reduction kernels */ -class ICLGEMMLowpReductionKernel : public ICLKernel -{ -public: - /** Constructor */ - ICLGEMMLowpReductionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - ICLGEMMLowpReductionKernel(const ICLGEMMLowpReductionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - ICLGEMMLowpReductionKernel &operator=(const ICLGEMMLowpReductionKernel &) = delete; - /** Allow instances of this class to be moved */ - ICLGEMMLowpReductionKernel(ICLGEMMLowpReductionKernel &&) = default; - /** Allow instances of this class to be moved */ - ICLGEMMLowpReductionKernel &operator=(ICLGEMMLowpReductionKernel &&) = default; - - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. - * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - */ - virtual void configure(const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0; - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. - * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - */ - virtual void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0; - -protected: - const ICLTensor *_input; - ICLTensor *_output; -}; - -/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A. - * - * @note This stage is needed to handle the offset of matrix product - * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md - */ -class CLGEMMLowpMatrixAReductionKernel : public ICLGEMMLowpReductionKernel -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. - * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - */ - void configure(const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override; - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. - * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override; - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixAReductionKernel - * - * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. - * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - * - * @return a status - */ - static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; -}; - -/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B. - * - * @note This stage is needed to handle the offset of matrix product - * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md - */ -class CLGEMMLowpMatrixBReductionKernel : public ICLGEMMLowpReductionKernel -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. - * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - */ - void configure(const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override; - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. - * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override; - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixBReductionKernel - * - * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. - * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - * - * @return a status - */ - static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h deleted file mode 100644 index 4abd60c202..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H -#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply two input matrices "A" and "B" and add a martix "C" if provided. All elements of the output matrix will be multiplied by alpha. In case matrix C is passed, it will be added to the previous result. - * For the matrix C, the broadcast addition is supported if the flag "broadcast_bias" is set in the GEMMReshapeInfo object - * - * @note If the input tensors @p input0 and @p input1 have been reshaped respectively with @ref CLGEMMReshapeLHSMatrixKernel" and @ref CLGEMMReshapeRHSMatrixKernel, - * the flag @p is_interleaved_transposed must be set to true - * - * @attention @p input1 tensor must have at least 2 dimensions (matrix) - * - */ -class CLGEMMMatrixMultiplyKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLGEMMMatrixMultiplyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyKernel(const CLGEMMMatrixMultiplyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyKernel &operator=(const CLGEMMMatrixMultiplyKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyKernel(CLGEMMMatrixMultiplyKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyKernel &operator=(CLGEMMMatrixMultiplyKernel &&) = default; - /** Initialise the kernel's input, output and alpha - * - * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32 - * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0 - * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported. - * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel - * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped - * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy - * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication - * - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f, - bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo()); - /** Initialise the kernel's input, output and alpha - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32 - * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0 - * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported. - * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel - * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped - * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy - * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication - * - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f, - bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyKernel - * - * @param[in] input0 Input tensor containing the Matrix A info. Data types supported: F16/F32 - * @param[in] input1 Input tensor containing the Matrix B info. Data type supported: same as @p input0 - * @param[in] input2 Input tensor containing the Matrix C (bias) info. Can be nullptr. Data type supported: same as @p input0 - * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of vector C. Default value is 0. Only beta = 1 is currently supported. - * @param[in] is_interleaved_transposed True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel - * @param[in] reshape_info GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped - * @param[in] gpu_target GPU Target - * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy - * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, - bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, GPUTarget gpu_target, bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -public: - const ICLTensor *_input0; - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; - bool _slide_matrix_b; - bool _reinterpret_input_as_3d; - bool _reinterpret_output_as_3d; - bool _add_bias; - bool _broadcast_bias; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h deleted file mode 100644 index 006b2bf91f..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H -#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply matrices when neither of the input matrices have been reshaped */ -class CLGEMMMatrixMultiplyNativeKernel : public ICLKernel -{ -public: - /** Default Constructor */ - CLGEMMMatrixMultiplyNativeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyNativeKernel(const CLGEMMMatrixMultiplyNativeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyNativeKernel &operator=(const CLGEMMMatrixMultiplyNativeKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyNativeKernel(CLGEMMMatrixMultiplyNativeKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyNativeKernel &operator=(CLGEMMMatrixMultiplyNativeKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input0 Input tensor for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. - * @param[out] output Output tensor info. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported: - * lhs_info.m0: 1,2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same of lhs_info.k0 - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 Input tensor for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. - * @param[out] output Output tensor info. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported: - * lhs_info.m0: 1,2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same of lhs_info.k0 - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, - const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyNativeKernel - * - * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor info for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0. - * @param[in] output Output tensor info. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported: - * lhs_info.m0: 1,2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same of lhs_info.k0 - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; - bool _slide_matrix_b; - bool _reinterpret_input_as_3d; - bool _reinterpret_output_as_3d; - bool _use_dummy_work_items; - bool _add_bias; - bool _broadcast_bias; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H*/ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h deleted file mode 100644 index 962645749e..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H -#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply matrices when both the input matrices LHS (input0) and RHS (input1) have been reshaped - * - * @note The input matrices @p input0 and @p input1 must be reshaped through @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel - */ -class CLGEMMMatrixMultiplyReshapedKernel : public ICLKernel -{ -public: - /** Default Constructor */ - CLGEMMMatrixMultiplyReshapedKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyReshapedKernel(const CLGEMMMatrixMultiplyReshapedKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyReshapedKernel &operator=(const CLGEMMMatrixMultiplyReshapedKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyReshapedKernel(CLGEMMMatrixMultiplyReshapedKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyReshapedKernel &operator=(CLGEMMMatrixMultiplyReshapedKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag. - * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the - * multiplications. i.e. float c = (half)a * (half)b - * - * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. - * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, - * the following conditions are required: - * -# rhs_info.n0 can only be 4, 8 and 16 - * -# rhs_info.k0 can only be 4, 8 and 16 - * -# Data type can only be F32 - * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension - * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement - * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) - * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT - * - * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4 - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3 - * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.transpose: false - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) - * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) - * rhs_info.transpose: true - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @note lhs_info.k0 must be equal to rhs_info.k0 - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - /** Initialise the kernel's input and output. - * - * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag. - * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the - * multiplications. i.e. float c = (half)a * (half)b - * - * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. - * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, - * the following conditions are required: - * -# rhs_info.n0 can only be 4, 8 and 16 - * -# rhs_info.k0 can only be 4, 8 and 16 - * -# Data type can only be F32 - * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension - * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement - * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) - * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4 - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3 - * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.transpose: false - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) - * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) - * rhs_info.transpose: true - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @note lhs_info.k0 must be equal to rhs_info.k0 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, - const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedKernel - * - * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag. - * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the - * multiplications. i.e. float c = (half)a * (half)b - * - * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. - * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, - * the following conditions are required: - * -# rhs_info.n0 can only be 4, 8 and 16 - * -# rhs_info.k0 can only be 4, 8 and 16 - * -# Data type can only be F32 - * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension - * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement - * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) - * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT - * - * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4 - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3 - * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0. - * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.transpose: false - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) - * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) - * rhs_info.transpose: true - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @note lhs_info.k0 must be equal to rhs_info.k0 - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; - bool _slide_matrix_b; - bool _reinterpret_output_as_3d; - bool _use_dummy_work_items; - bool _add_bias; - bool _broadcast_bias; - bool _export_to_cl_image; - unsigned int _k; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H*/ \ No newline at end of file diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h deleted file mode 100644 index eab7fd219e..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H -#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply matrices when only the input matrix RHS (input1) has been reshaped - * - * @note The input matrix input1 must be reshaped through @ref CLGEMMReshapeRHSMatrixKernel - */ -class CLGEMMMatrixMultiplyReshapedOnlyRHSKernel : public ICLKernel -{ -public: - /** Default Constructor */ - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(const CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &operator=(const CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &operator=(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. - * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, - * the following conditions are required: - * -# rhs_info.n0 can only be 4, 8 and 16 - * -# rhs_info.k0 can only be 4, 8 and 16 - * -# Data type can only be F32 - * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension - * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement - * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) - * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT - * - * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). - * The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported: - * lhs_info.m0: 1,2,3,4,5,6,7,8 - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.k0: 2,3,4,8,16 - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.transpose: true,false - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - /** Initialise the kernel's input and output. - * - * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. - * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, - * the following conditions are required: - * -# rhs_info.n0 can only be 4, 8 and 16 - * -# rhs_info.k0 can only be 4, 8 and 16 - * -# Data type can only be F32 - * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension - * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement - * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) - * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). - * The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported: - * lhs_info.m0: 1,2,3,4,5,6,7,8 - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.k0: 2,3,4,8,16 - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.transpose: true,false - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, - const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel - * - * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. - * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, - * the following conditions are required: - * -# rhs_info.n0 can only be 4, 8 and 16 - * -# rhs_info.k0 can only be 4, 8 and 16 - * -# Data type can only be F32 - * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension - * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement - * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) - * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT - * - * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). - * The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor info for the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0. - * @param[in] output Output tensor info. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported: - * lhs_info.m0: 1,2,3,4,5,6,7,8 - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.k0: 2,3,4,8,16 - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.transpose: true,false - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; - bool _slide_matrix_b; - bool _reinterpret_input_as_3d; - bool _reinterpret_output_as_3d; - bool _use_dummy_work_items; - bool _add_bias; - bool _broadcast_bias; - bool _export_to_cl_image; - bool _has_pad_y; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H*/ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h deleted file mode 100644 index 95ed87d95b..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H -#define ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the GEMM matrix vector multiply kernel. **/ -class CLGEMMMatrixVectorMultiplyKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLGEMMMatrixVectorMultiplyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixVectorMultiplyKernel(const CLGEMMMatrixVectorMultiplyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixVectorMultiplyKernel &operator=(const CLGEMMMatrixVectorMultiplyKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMMatrixVectorMultiplyKernel(CLGEMMMatrixVectorMultiplyKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMMatrixVectorMultiplyKernel &operator=(CLGEMMMatrixVectorMultiplyKernel &&) = default; - /** Set the input and output of the kernel. - * - * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input. - * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); - /** Set the input and output of the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input. - * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixVectorMultiplyKernel - * - * @param[in] input0 The reshaped input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] input1 The 2D reshaped weights tensor info. Data type supported: Same as @p input. - * @param[in] output The output 2D tensor info. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - ICLTensor *_output; - int _num_rows_read_per_iteration; - BorderSize _border_size; -}; -} // arm_compute -#endif /*ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h b/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h deleted file mode 100644 index 0f74cb85e4..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H -#define ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to reshape the LHS matrix when performing the matrix multiplication. - * In particular, this function splits the input matrix in blocks of size M0xK0 (defined through GEMMLHSInfo) and - * stores each one in the output matrix unrolling the values - */ -class CLGEMMReshapeLHSMatrixKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLGEMMReshapeLHSMatrixKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMReshapeLHSMatrixKernel(const CLGEMMReshapeLHSMatrixKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMReshapeLHSMatrixKernel &operator=(const CLGEMMReshapeLHSMatrixKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMReshapeLHSMatrixKernel(CLGEMMReshapeLHSMatrixKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMReshapeLHSMatrixKernel &operator=(CLGEMMReshapeLHSMatrixKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: same as @p input - * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary - * information to reshape the input tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.v0: greater than 0 - * lhs_info.transpose: true, false - * lhs_info.interleave: true, false - * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor - */ - void configure(const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: same as @p input - * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary - * information to reshape the input tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.v0: greater than 0 - * lhs_info.transpose: true, false - * lhs_info.interleave: true, false - * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeLHSMatrixKernel - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input. - * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary - * information to reshape the input tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.v0: greater than 0 - * lhs_info.transpose: true, false - * lhs_info.interleave: true, false - * @param[in] reinterpret_input_as_3d True if the input has to be reinterpreted as 3D tensor - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d); - - // Inherited methods overridden - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - bool _reinterpret_input_as_3d; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H */ \ No newline at end of file diff --git a/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h b/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h deleted file mode 100644 index 557f71b07d..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H -#define ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to reshape the RHS matrix when performing the matrix multiplication - * In particular, this kernel splits the input matrix in blocks of size K0xN0 and stores each one in - * the output matrix unrolling the values */ -class CLGEMMReshapeRHSMatrixKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLGEMMReshapeRHSMatrixKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMReshapeRHSMatrixKernel(const CLGEMMReshapeRHSMatrixKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMReshapeRHSMatrixKernel &operator=(const CLGEMMReshapeRHSMatrixKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMReshapeRHSMatrixKernel(CLGEMMReshapeRHSMatrixKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMReshapeRHSMatrixKernel &operator=(CLGEMMReshapeRHSMatrixKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will guarantee the OpenCL pitch alignment for the output tensor, - * required to create a OpenCL image object from buffer in @ref CLGEMMMatrixMultiplyReshapedKernel and in @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel - * Since the OpenCL image object is created importing the OpenCL buffer, the following conditions are required: - * -# rhs_info.n0 can only be 4, 8 and 16 - * -# rhs_info.k0 can only be 4, 8 and 16 - * -# Data type can only be F32, F16 - * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension - * -# output width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) - * -# output (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT - * -# The output tensor should be only consumed by @ref CLGEMMMatrixMultiplyReshapedKernel or @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel - * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: same as @p input - * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary - * information to reshape the input tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image == true) - * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false), (only 4, 8 and 16 if rhs_info.export_to_cl_image == true) - * rhs_info.h0: greater than 0 - * rhs_info.transpose: true, false - * rhs_info.interleave: true, false - */ - void configure(const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info); - /** Initialise the kernel's input and output. - * - * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will guarantee the OpenCL pitch alignment for the output tensor, - * required to create a OpenCL image object from buffer in @ref CLGEMMMatrixMultiplyReshapedKernel and in @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel - * Since the OpenCL image object is created importing the OpenCL buffer, the following conditions are required: - * -# rhs_info.n0 can only be 4, 8 and 16 - * -# rhs_info.k0 can only be 4, 8 and 16 - * -# Data type can only be F32, F16 - * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension - * -# output width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) - * -# output (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT - * -# The output tensor should be only consumed by @ref CLGEMMMatrixMultiplyReshapedKernel or @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: same as @p input - * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary - * information to reshape the input tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image == true) - * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false), (only 4, 8 and 16 if rhs_info.export_to_cl_image == true) - * rhs_info.h0: greater than 0 - * rhs_info.transpose: true, false - * rhs_info.interleave: true, false - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeRHSMatrixKernel - * - * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will guarantee the OpenCL pitch alignment for the output tensor, - * required to create a OpenCL image object from buffer in @ref CLGEMMMatrixMultiplyReshapedKernel and in @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel - * Since the OpenCL image object is created importing the OpenCL buffer, the following conditions are required: - * -# rhs_info.n0 can only be 4, 8 and 16 - * -# rhs_info.k0 can only be 4, 8 and 16 - * -# Data type can only be F32, F16 - * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension - * -# output width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) - * -# output (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT - * -# The output tensor should be only consumed by @ref CLGEMMMatrixMultiplyReshapedKernel or @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input. - * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary - * information to reshape the input tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image == true) - * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false),(only 4, 8 and 16 if rhs_info.export_to_cl_image == true) - * rhs_info.h0: greater than 0 - * rhs_info.transpose: true, false - * rhs_info.interleave: true, false - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const GEMMRHSMatrixInfo &rhs_info); - - // Inherited methods overridden - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H */ \ No newline at end of file diff --git a/arm_compute/core/CL/kernels/CLGatherKernel.h b/arm_compute/core/CL/kernels/CLGatherKernel.h deleted file mode 100644 index c8a96327b6..0000000000 --- a/arm_compute/core/CL/kernels/CLGatherKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGATHERKERNEL_H -#define ARM_COMPUTE_CLGATHERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to perform tensor reshaping */ -class CLGatherKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLGatherKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGatherKernel(const CLGatherKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGatherKernel &operator=(const CLGatherKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGatherKernel(CLGatherKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGatherKernel &operator=(CLGatherKernel &&) = default; - /** Default destructor */ - ~CLGatherKernel() = default; - /** Initialise the kernel's inputs and outputs - * - * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All. - * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 - */ - void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); - /** Initialise the kernel's inputs and outputs - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All. - * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); - - /** Static function to check if given info will lead to a valid configuration of @ref CLGatherKernel - * - * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All. - * @param[in] indices Indices tensor info. Supported tensor rank: up to 4. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) - * @param[in] output Destination tensor info. Data type supported: Same as @p input - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis = 0); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - const ICLTensor *_indices; /**< Indices tensor */ - ICLTensor *_output; /**< Destination tensor */ - int _axis; /**< Axis index */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGATHERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h b/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h deleted file mode 100644 index a783527de4..0000000000 --- a/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H -#define ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the Gaussian 3x3 filter kernel. - * - */ -class CLGaussian3x3Kernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h b/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h deleted file mode 100644 index e8c2268e26..0000000000 --- a/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H -#define ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H - -#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run the horizontal pass of 5x5 Gaussian filter on a tensor. */ -class CLGaussian5x5HorKernel : public CLSeparableConvolution5x5HorKernel -{ -public: - /** Initialise the kernel's source, destination and border. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - -private: - //Make the configure method of the parent class private - using CLSeparableConvolution5x5HorKernel::configure; -}; - -/** Interface for the kernel to run the vertical pass of 5x5 Gaussian filter on a tensor. */ -class CLGaussian5x5VertKernel : public CLSeparableConvolution5x5VertKernel -{ -public: - /** Initialise the kernel's source, destination and border. - * - * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16. - * @param[out] output Destination tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16. - * @param[out] output Destination tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - -private: - //Make the configure method of the parent class private - using CLSeparableConvolution5x5VertKernel::configure; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h b/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h deleted file mode 100644 index 36e095d4d1..0000000000 --- a/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H -#define ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H - -#include "arm_compute/core/CL/ICLSimpleKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform a Gaussian filter and half scaling across width (horizontal pass) */ -class CLGaussianPyramidHorKernel : public ICLSimpleKernel -{ -public: - /** Default constructor */ - CLGaussianPyramidHorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGaussianPyramidHorKernel(const CLGaussianPyramidHorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGaussianPyramidHorKernel &operator=(const CLGaussianPyramidHorKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGaussianPyramidHorKernel(CLGaussianPyramidHorKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGaussianPyramidHorKernel &operator=(CLGaussianPyramidHorKernel &&) = default; - /** Default destructor */ - ~CLGaussianPyramidHorKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - int _l2_load_offset; -}; - -/** OpenCL kernel to perform a Gaussian filter and half scaling across height (vertical pass) */ -class CLGaussianPyramidVertKernel : public ICLSimpleKernel -{ -public: - /** Default constructor */ - CLGaussianPyramidVertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGaussianPyramidVertKernel(const CLGaussianPyramidVertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGaussianPyramidVertKernel &operator=(const CLGaussianPyramidVertKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGaussianPyramidVertKernel(CLGaussianPyramidVertKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGaussianPyramidVertKernel &operator=(CLGaussianPyramidVertKernel &&) = default; - /** Default destructor */ - ~CLGaussianPyramidVertKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data types supported: U16. - * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U16. - * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - int _t2_load_offset; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h b/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h deleted file mode 100644 index 9dfe4a42ce..0000000000 --- a/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H -#define ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -namespace arm_compute -{ -class ICLTensor; - -/** Interface for Compute All Anchors kernel */ -class CLComputeAllAnchorsKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLComputeAllAnchorsKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLComputeAllAnchorsKernel(const CLComputeAllAnchorsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLComputeAllAnchorsKernel &operator=(const CLComputeAllAnchorsKernel &) = delete; - /** Allow instances of this class to be moved */ - CLComputeAllAnchorsKernel(CLComputeAllAnchorsKernel &&) = default; - /** Allow instances of this class to be moved */ - CLComputeAllAnchorsKernel &operator=(CLComputeAllAnchorsKernel &&) = default; - /** Default destructor */ - ~CLComputeAllAnchorsKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 - * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input - * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo - * - */ - void configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 - * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input - * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo - * - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref CLComputeAllAnchorsKernel - * - * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 - * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input - * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo - * - * @return a Status - */ - static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_anchors; - ICLTensor *_all_anchors; -}; -} // arm_compute -#endif // ARM_COMPUTE_CLGENERATEPROSPOSALSLAYERKERNEL_H diff --git a/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h b/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h deleted file mode 100644 index c001aa2c9f..0000000000 --- a/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H -#define ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/IHOG.h" -#include "arm_compute/core/Size2D.h" - -namespace arm_compute -{ -class ITensor; - -/** OpenCL kernel to perform HOG Orientation Binning */ -class CLHOGOrientationBinningKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLHOGOrientationBinningKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGOrientationBinningKernel(const CLHOGOrientationBinningKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGOrientationBinningKernel &operator=(const CLHOGOrientationBinningKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHOGOrientationBinningKernel(CLHOGOrientationBinningKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHOGOrientationBinningKernel &operator=(CLHOGOrientationBinningKernel &&) = default; - /** Default destructor */ - ~CLHOGOrientationBinningKernel() = default; - - /** Initialise the kernel's inputs, output and HOG's metadata - * - * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. - * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 - * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[in] hog_info HOG's metadata - */ - void configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info); - /** Initialise the kernel's inputs, output and HOG's metadata - * - * @param[in] compile_context The compile context to be used. - * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. - * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 - * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[in] hog_info HOG's metadata - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input_magnitude; - const ICLTensor *_input_phase; - ICLTensor *_output; - Size2D _cell_size; -}; - -/** OpenCL kernel to perform HOG block normalization */ -class CLHOGBlockNormalizationKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLHOGBlockNormalizationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGBlockNormalizationKernel(const CLHOGBlockNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGBlockNormalizationKernel &operator=(const CLHOGBlockNormalizationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHOGBlockNormalizationKernel(CLHOGBlockNormalizationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHOGBlockNormalizationKernel &operator=(CLHOGBlockNormalizationKernel &&) = default; - /** Default destructor */ - ~CLHOGBlockNormalizationKernel() = default; - - /** Initialise the kernel's input, output and HOG's metadata - * - * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog_info HOG's metadata - */ - void configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info); - /** Initialise the kernel's input, output and HOG's metadata - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog_info HOG's metadata - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - Size2D _num_cells_per_block_stride; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h b/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h deleted file mode 100644 index dc9bba8f20..0000000000 --- a/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLHOGDETECTORKERNEL_H -#define ARM_COMPUTE_CLHOGDETECTORKERNEL_H - -#include "arm_compute/core/CL/ICLArray.h" -#include "arm_compute/core/CL/ICLHOG.h" -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/CL/OpenCL.h" - -namespace cl -{ -class Buffer; -} - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform HOG detector kernel using linear SVM */ -class CLHOGDetectorKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLHOGDetectorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGDetectorKernel(const CLHOGDetectorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGDetectorKernel &operator=(const CLHOGDetectorKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHOGDetectorKernel(CLHOGDetectorKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHOGDetectorKernel &operator=(CLHOGDetectorKernel &&) = default; - /** Default destructor */ - ~CLHOGDetectorKernel() = default; - - /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect - * - * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel - * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects - * @param[in] num_detection_windows Number of detected objects - * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. - * It must be multiple of the hog->info()->block_stride() - * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane - * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to - */ - void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, - uint16_t idx_class = 0); - /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel - * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects - * @param[in] num_detection_windows Number of detected objects - * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. - * It must be multiple of the hog->info()->block_stride() - * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane - * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, - const Size2D &detection_window_stride, float threshold = 0.0f, - uint16_t idx_class = 0); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue); - -private: - const ICLTensor *_input; - ICLDetectionWindowArray *_detection_windows; - cl::Buffer *_num_detection_windows; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLHOGDETECTORKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h b/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h deleted file mode 100644 index 38a2f04adf..0000000000 --- a/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLHARRISCORNERSKERNEL_H -#define ARM_COMPUTE_CLHARRISCORNERSKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include - -namespace arm_compute -{ -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the harris score kernel. - * - * @note The implementation supports 3, 5, and 7 for the block_size. - */ -class CLHarrisScoreKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLHarrisScoreKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHarrisScoreKernel(const CLHarrisScoreKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHarrisScoreKernel &operator=(const CLHarrisScoreKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHarrisScoreKernel(CLHarrisScoreKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHarrisScoreKernel &operator=(CLHarrisScoreKernel &&) = default; - /** Default destructor */ - ~CLHarrisScoreKernel() = default; - - /** Setup the kernel parameters - * - * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2) - * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1) - * @param[out] output Destination image (harris score). Data types supported F32 - * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7 - * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) - * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). - * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output, - int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, - bool border_undefined); - /** Setup the kernel parameters - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2) - * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1) - * @param[out] output Destination image (harris score). Data types supported F32 - * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7 - * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) - * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). - * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input1, const ICLImage *input2, ICLImage *output, - int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, - bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -protected: - const ICLImage *_input1; /**< Source image - Gx component */ - const ICLImage *_input2; /**< Source image - Gy component */ - ICLImage *_output; /**< Source image - Harris score */ - float _sensitivity; /**< Sensitivity value */ - float _strength_thresh; /**< Threshold value */ - float _norm_factor; /**< Normalization factor */ - BorderSize _border_size; /**< Border size */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLHARRISCORNERSKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h deleted file mode 100644 index f362441944..0000000000 --- a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H -#define ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -/** Interface for the height concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLHeightConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLHeightConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHeightConcatenateLayerKernel(const CLHeightConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHeightConcatenateLayerKernel &operator=(const CLHeightConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHeightConcatenateLayerKernel(CLHeightConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHeightConcatenateLayerKernel &operator=(CLHeightConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLHeightConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int height_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLHeightConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - -private: - unsigned int _height_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLHistogramKernel.h b/arm_compute/core/CL/kernels/CLHistogramKernel.h deleted file mode 100644 index 7cb79db6e9..0000000000 --- a/arm_compute/core/CL/kernels/CLHistogramKernel.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLHISTOGRAMKERNEL_H -#define ARM_COMPUTE_CLHISTOGRAMKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLDistribution1D; -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface to run the histogram kernel. This kernel processes the part of image with width can be divided by 16. - * If the image width is not a multiple of 16, remaining pixels have to be processed with the @ref CLHistogramBorderKernel - */ -class CLHistogramKernel : public ICLKernel -{ -public: - /** Constructor */ - CLHistogramKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHistogramKernel(const CLHistogramKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHistogramKernel &operator=(const CLHistogramKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHistogramKernel(CLHistogramKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHistogramKernel &operator=(CLHistogramKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Destination distribution. - */ - void configure(const ICLImage *input, ICLDistribution1D *output); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Destination distribution. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLImage *_input; - ICLDistribution1D *_output; -}; - -/** Interface to run the histogram kernel to handle the leftover part of image - * - */ -class CLHistogramBorderKernel : public ICLKernel -{ -public: - /** Constructor */ - CLHistogramBorderKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHistogramBorderKernel(const CLHistogramBorderKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHistogramBorderKernel &operator=(const CLHistogramBorderKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHistogramBorderKernel(CLHistogramBorderKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHistogramBorderKernel &operator=(CLHistogramBorderKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Destination distribution. - */ - void configure(const ICLImage *input, ICLDistribution1D *output); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Destination distribution. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLImage *_input; - ICLDistribution1D *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLHISTOGRAMKERNEL_H*/ diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h deleted file mode 100644 index 7b7bd03108..0000000000 --- a/arm_compute/core/CL/kernels/CLIm2ColKernel.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLIM2COLKERNEL_H -#define ARM_COMPUTE_CLIM2COLKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Size2D.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the im2col reshape kernel. - * - * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. - * It is used to transform a convolution to a plain matrix multiplication. - * - * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have: - * @f[ - * \left( \begin{array}{cccc} - * a00 & a01 & a02 & a03 \\ - * a10 & a11 & a12 & a13 \\ - * a20 & a21 & a22 & a23 \\ - * a30 & a31 & a32 & a33 \\ - * \end{array} \right) - * = - * \left( \begin{array}{ccccccccc} - * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\ - * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\ - * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\ - * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\ - * \end{array} \right) - * @f] - */ -class CLIm2ColKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLIm2ColKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLIm2ColKernel(const CLIm2ColKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLIm2ColKernel &operator=(const CLIm2ColKernel &) = delete; - /** Allow instances of this class to be moved */ - CLIm2ColKernel(CLIm2ColKernel &&) = default; - /** Allow instances of this class to be moved */ - CLIm2ColKernel &operator=(CLIm2ColKernel &&) = default; - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, - * while every dimension above represents a batch. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * This is valid only for non-quantized inputs. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. - * Number of groups other than 1 is only supported for NCHW data layout. - * Number of groups should be multiple to the number of channels. - */ - void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), - unsigned int num_groups = 1); - /** Set the input and output of the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, - * while every dimension above represents a batch. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, - const Size2D &dilation = Size2D(1U, 1U), - unsigned int num_groups = 1); - /** Static function to check if given info will lead to a valid configuration of @ref CLIm2ColKernel - * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, - * while every dimension above represents a batch. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * This is valid only for non-quantized inputs. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. - * Number of groups other than 1 is only supported for NCHW data layout. - * Number of groups should be multiple to the number of channels. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), - unsigned int num_groups = 1); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -public: - const ICLTensor *_input; - ICLTensor *_output; - DataLayout _data_layout; - std::pair _convolved_dims; - unsigned int _num_elems_processed_per_iteration; - Size2D _kernel_dims; - PadStrideInfo _conv_info; - unsigned int _num_groups; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLIM2COLKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h deleted file mode 100644 index a3fdd3c4e7..0000000000 --- a/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** Interface for performing an instance normalization */ -class CLInstanceNormalizationLayerKernel : public ICLKernel -{ -public: - /** Constructor */ - CLInstanceNormalizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLInstanceNormalizationLayerKernel(const CLInstanceNormalizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLInstanceNormalizationLayerKernel &operator=(const CLInstanceNormalizationLayerKernel &) = delete; - /** Default Move Constructor. */ - CLInstanceNormalizationLayerKernel(CLInstanceNormalizationLayerKernel &&) = default; - /** Default move assignment operator */ - CLInstanceNormalizationLayerKernel &operator=(CLInstanceNormalizationLayerKernel &&) = default; - /** Default destructor */ - ~CLInstanceNormalizationLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC - * In case of @p output tensor = nullptr this tensor will store the result of the normalization. - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * @param[in] info Kernel meta-data descriptor - */ - void configure(ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC - * In case of @p output tensor = nullptr this tensor will store the result of the normalization. - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * @param[in] info Kernel meta-data descriptor - */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer. - * - * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NHWC, NCHW - * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. - * @param[in] info Kernel meta-data descriptor - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; - ICLTensor *_output; - bool _run_in_place; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h b/arm_compute/core/CL/kernels/CLIntegralImageKernel.h deleted file mode 100644 index cef699ab54..0000000000 --- a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H -#define ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface to run the horizontal pass of the integral image kernel. */ -class CLIntegralImageHorKernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output Destination tensor, Data types supported: U32. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output Destination tensor, Data types supported: U32. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); -}; - -/** Interface to run the vertical pass of the integral image kernel. */ -class CLIntegralImageVertKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLIntegralImageVertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLIntegralImageVertKernel(const CLIntegralImageVertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLIntegralImageVertKernel &operator=(const CLIntegralImageVertKernel &) = delete; - /** Allow instances of this class to be moved */ - CLIntegralImageVertKernel(CLIntegralImageVertKernel &&) = default; - /** Allow instances of this class to be moved */ - CLIntegralImageVertKernel &operator=(CLIntegralImageVertKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in,out] in_out The input/output tensor. Data types supported: U32 - */ - void configure(ICLTensor *in_out); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] in_out The input/output tensor. Data types supported: U32 - */ - void configure(const CLCompileContext &compile_context, ICLTensor *in_out); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_in_out; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h b/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h deleted file mode 100644 index 55fe563954..0000000000 --- a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H -#define ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */ -class CLL2NormalizeLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLL2NormalizeLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLL2NormalizeLayerKernel(const CLL2NormalizeLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLL2NormalizeLayerKernel &operator=(const CLL2NormalizeLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLL2NormalizeLayerKernel(CLL2NormalizeLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLL2NormalizeLayerKernel &operator=(CLL2NormalizeLayerKernel &&) = default; - /** Default destructor */ - ~CLL2NormalizeLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[in] sum Sum values tensor. Data types supported: same as @p input. - * Sum will have the same number of dimensions as input. - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 - * @param[in] epsilon Lower bound value for the normalization. - */ - void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[in] sum Sum values tensor. Data types supported: same as @p input. - * Sum will have the same number of dimensions as input. - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 - * @param[in] epsilon Lower bound value for the normalization. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon); - - /** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayerKernel. - * - * @param[in] input Source tensor info. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[in] sum Sum values tensor info. Data types supported: same as @p input. - * Sum will have the same number of dimensions as input. - * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 - * @param[in] epsilon Lower bound value for the normalization. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_sum; - ICLTensor *_output; - unsigned int _actual_axis; - float _epsilon; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h b/arm_compute/core/CL/kernels/CLLKTrackerKernel.h deleted file mode 100644 index fdc2ef8333..0000000000 --- a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLLKTRACKERKERNEL_H -#define ARM_COMPUTE_CLLKTRACKERKERNEL_H - -#include "arm_compute/core/CL/ICLArray.h" -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -#include -#include - -namespace arm_compute -{ -class ICLTensor; - -/** Internal keypoint structure for Lucas-Kanade Optical Flow */ -struct CLLKInternalKeypoint -{ - float x{ 0.f }; /**< x coordinate of the keypoint */ - float y{ 0.f }; /**< y coordinate of the keypoint */ - float tracking_status{ 0.f }; /**< the tracking status of the keypoint */ - float dummy{ 0.f }; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */ -}; - -/** Structure for storing Spatial Gradient Matrix and the minimum eigenvalue for each keypoint */ -struct CLCoefficientTable -{ - float A11; /**< iA11 * FLT_SCALE */ - float A12; /**< iA11 * FLT_SCALE */ - float A22; /**< iA11 * FLT_SCALE */ - float min_eig; /**< Minimum eigenvalue */ -}; - -/** Structure for storing ival, ixval and iyval for each point inside the window */ -struct CLOldValue -{ - int16_t ival; /**< ival extracts from old image */ - int16_t ixval; /**< ixval extracts from scharr Gx image */ - int16_t iyval; /**< iyval extracts from scharr Gy image */ - int16_t dummy; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */ -}; - -/** Interface for OpenCL Array of Internal Key Points. */ -using ICLLKInternalKeypointArray = ICLArray; -/** Interface for OpenCL Array of Coefficient Tables. */ -using ICLCoefficientTableArray = ICLArray; -/** Interface for OpenCL Array of Old Values. */ -using ICLOldValArray = ICLArray; - -/** Interface to run the initialization step of LKTracker */ -class CLLKTrackerInitKernel : public ICLKernel -{ -public: - /** Initialise the kernel input and output - * - * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points - * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points - * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points - * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points - * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used - * @param[in] level The pyramid level - * @param[in] num_levels The number of pyramid levels - * @param[in] pyramid_scale Scale factor used for generating the pyramid - */ - void configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, - ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, - bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale); - /** Initialise the kernel input and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points - * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points - * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points - * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points - * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used - * @param[in] level The pyramid level - * @param[in] num_levels The number of pyramid levels - * @param[in] pyramid_scale Scale factor used for generating the pyramid - */ - void configure(const CLCompileContext &compile_context, const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, - ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, - bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; -}; - -/** Interface to run the finalize step of LKTracker, where it truncates the coordinates stored in new_points array */ -class CLLKTrackerFinalizeKernel : public ICLKernel -{ -public: - /** Initialise the kernel input and output - * - * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points - * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points - */ - void configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points); - /** Initialise the kernel input and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points - * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points - */ - void configure(const CLCompileContext &compile_context, ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; -}; - -/** Interface to run the first stage of LKTracker, where A11, A12, A22, min_eig, ival, ixval and iyval are computed */ -class CLLKTrackerStage0Kernel : public ICLKernel -{ -public: - /** Default constructor */ - CLLKTrackerStage0Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLKTrackerStage0Kernel(const CLLKTrackerStage0Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLKTrackerStage0Kernel &operator=(const CLLKTrackerStage0Kernel &) = delete; - /** Allow instances of this class to be moved */ - CLLKTrackerStage0Kernel(CLLKTrackerStage0Kernel &&) = default; - /** Allow instances of this class to be moved */ - CLLKTrackerStage0Kernel &operator=(CLLKTrackerStage0Kernel &&) = default; - /** Initialise the kernel input and output - * - * @param[in] old_input Pointer to the input old tensor. Data types supported: U8 - * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16 - * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16 - * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points - * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points - * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients - * @param[out] old_ival Pointer to the array holding internal values - * @param[in] window_dimension The size of the window on which to perform the algorithm - * @param[in] level The pyramid level - */ - void configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy, - ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, - ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, - size_t window_dimension, size_t level); - /** Initialise the kernel input and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] old_input Pointer to the input old tensor. Data types supported: U8 - * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16 - * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16 - * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points - * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points - * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients - * @param[out] old_ival Pointer to the array holding internal values - * @param[in] window_dimension The size of the window on which to perform the algorithm - * @param[in] level The pyramid level - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy, - ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, - ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, - size_t window_dimension, size_t level); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_old_input; - const ICLTensor *_old_scharr_gx; - const ICLTensor *_old_scharr_gy; -}; - -/** Interface to run the second stage of LKTracker, where the motion vectors of the given points are computed */ -class CLLKTrackerStage1Kernel : public ICLKernel -{ -public: - /** Default constructor */ - CLLKTrackerStage1Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLKTrackerStage1Kernel(const CLLKTrackerStage1Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLKTrackerStage1Kernel &operator=(const CLLKTrackerStage1Kernel &) = delete; - /** Allow instances of this class to be moved */ - CLLKTrackerStage1Kernel(CLLKTrackerStage1Kernel &&) = default; - /** Allow instances of this class to be moved */ - CLLKTrackerStage1Kernel &operator=(CLLKTrackerStage1Kernel &&) = default; - /** Initialise the kernel input and output - * - * @param[in] new_input Pointer to the input new tensor. Data types supported: U8 - * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points - * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients - * @param[in] old_ival Pointer to the array holding internal values - * @param[in] termination The criteria to terminate the search of each keypoint. - * @param[in] epsilon The error for terminating the algorithm - * @param[in] num_iterations The maximum number of iterations before terminating the algorithm - * @param[in] window_dimension The size of the window on which to perform the algorithm - * @param[in] level The pyramid level - */ - void configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, - Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level); - /** Initialise the kernel input and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] new_input Pointer to the input new tensor. Data types supported: U8 - * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points - * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients - * @param[in] old_ival Pointer to the array holding internal values - * @param[in] termination The criteria to terminate the search of each keypoint. - * @param[in] epsilon The error for terminating the algorithm - * @param[in] num_iterations The maximum number of iterations before terminating the algorithm - * @param[in] window_dimension The size of the window on which to perform the algorithm - * @param[in] level The pyramid level - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, - Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_new_input; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLLKTRACKERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h deleted file mode 100644 index d5653f83ea..0000000000 --- a/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H -#define ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply each row of first tensor with low 2 dimensions of second tensor. - * - * @attention The second input tensor must have at least 2 dimensions (matrix) - * - */ -class CLLocallyConnectedMatrixMultiplyKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLLocallyConnectedMatrixMultiplyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLocallyConnectedMatrixMultiplyKernel(const CLLocallyConnectedMatrixMultiplyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLocallyConnectedMatrixMultiplyKernel &operator=(const CLLocallyConnectedMatrixMultiplyKernel &) = delete; - /** Allow instances of this class to be moved */ - CLLocallyConnectedMatrixMultiplyKernel(CLLocallyConnectedMatrixMultiplyKernel &&) = default; - /** Allow instances of this class to be moved */ - CLLocallyConnectedMatrixMultiplyKernel &operator=(CLLocallyConnectedMatrixMultiplyKernel &&) = default; - /** Initialise the kernel's input, output and alpha - * - * @param[in] input0 First input tensor. Data types supported: F32 - * @param[in] input1 Second input tensor. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result. Data type supported: same as @p input0 - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); - /** Initialise the kernel's input, output and alpha - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 First input tensor. Data types supported: F32 - * @param[in] input1 Second input tensor. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result. Data type supported: same as @p input0 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLLocallyConnectedMatrixMultiplyKernel - * - * @param[in] input0 First input tensor info. Data types supported: F32 - * @param[in] input1 Second input tensor info. Data type supported: same as @p input0 - * @param[in] output Output tensor info. Data type supported: same as @p input0 - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h deleted file mode 100644 index a741b1745a..0000000000 --- a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H -#define ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Template interface for the kernel to compute magnitude and phase. - * - */ -class CLMagnitudePhaseKernel : public ICLKernel -{ -public: - /** Default constructor. */ - CLMagnitudePhaseKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMagnitudePhaseKernel(const CLMagnitudePhaseKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMagnitudePhaseKernel &operator=(const CLMagnitudePhaseKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMagnitudePhaseKernel(CLMagnitudePhaseKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMagnitudePhaseKernel &operator=(CLMagnitudePhaseKernel &&) = default; - /** Initialise the kernel's input, output. - * - * @note At least one of output1 or output2 must be set. - * - * @param[in] gx The input gradient X tensor. Data types supported: S16/S32. - * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32. - * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32. - * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8. - * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. - * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. - */ - void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, - MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED); - /** Initialise the kernel's input, output. - * - * @note At least one of output1 or output2 must be set. - * - * @param[in] compile_context The compile context to be used. - * @param[in] gx The input gradient X tensor. Data types supported: S16/S32. - * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32. - * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32. - * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8. - * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. - * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, - MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_gx; /**< Input gradient X. */ - const ICLTensor *_gy; /**< Input gradient Y. */ - ICLTensor *_magnitude; /**< Output - Magnitude. */ - ICLTensor *_phase; /**< Output - Phase. */ - bool _run_mag; /**< Calculate magnitude ? */ - bool _run_phase; /**< Calculate phase ? */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h deleted file mode 100644 index 9d51f6b59c..0000000000 --- a/arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMAXUNPOOLINGLAYERKERNEL_H -#define ARM_COMPUTE_CLMAXUNPOOLINGLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the pooling layer kernel */ -class CLMaxUnpoolingLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLMaxUnpoolingLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMaxUnpoolingLayerKernel(const CLMaxUnpoolingLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMaxUnpoolingLayerKernel &operator=(const CLMaxUnpoolingLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMaxUnpoolingLayerKernel(CLMaxUnpoolingLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMaxUnpoolingLayerKernel &operator=(CLMaxUnpoolingLayerKernel &&) = default; - /** Default destructor */ - ~CLMaxUnpoolingLayerKernel() = default; - /** Set the input and output tensors. - * - * @note Output shape must be equal to the shape of the original input to pool. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] indices Tensor containing the offset to store the input elements in the output tensor. - * @ref CLPoolingLayerKernel with indices should precede this function in order to - * properly reconstruct the output tensor. - * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, const PoolingLayerInfo &pool_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLMaxUnpoolingLayerKernel - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] output Destination tensor info. Data types supported: Same as @p input. - * @param[in] indices TensorInfo associated to the tensor containing the offset to store the input elements in the output tensor. - * @ref CLPoolingLayerKernel with indices should precede this function in order to - * properly reconstruct the output tensor. - * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info); - - // Inherited methods overridden - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - const ICLTensor *_indices; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLMAXUNPOOLINGLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h deleted file mode 100644 index 2a5a5f2e33..0000000000 --- a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMEANSTDDEVKERNEL_H -#define ARM_COMPUTE_CLMEANSTDDEVKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace cl -{ -class Buffer; -} - -namespace arm_compute -{ -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */ -class CLMeanStdDevKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLMeanStdDevKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMeanStdDevKernel(const CLMeanStdDevKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMeanStdDevKernel &operator=(const CLMeanStdDevKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMeanStdDevKernel(CLMeanStdDevKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMeanStdDevKernel &operator=(CLMeanStdDevKernel &&) = default; - /** Initialise the kernel's input and outputs. - * - * @param[in] input Input image. Data types supported: U8. - * @param[out] mean Input average pixel value. - * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong). - * @param[out] stddev (Optional) Output standard deviation of pixel values. - * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong). - */ - void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); - /** Initialise the kernel's input and outputs. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input image. Data types supported: U8. - * @param[out] mean Input average pixel value. - * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong). - * @param[out] stddev (Optional) Output standard deviation of pixel values. - * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong). - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevKernel. - * - * @param[in] input Input image info. Data types supported: U8. - * @param[in] mean Input average pixel value. - * @param[in] global_sum Keeps global sum of pixel values. - * @param[in] stddev (Optional) Output standard deviation of pixel values. - * @param[in] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - - BorderSize border_size() const override; - -private: - const ICLImage *_input; - float *_mean; - float *_stddev; - cl::Buffer *_global_sum; - cl::Buffer *_global_sum_squared; - BorderSize _border_size; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLMEANSTDDEVKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h deleted file mode 100644 index ff5e9ab0f7..0000000000 --- a/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H -#define ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to normalize the input 2D tensor across the first dimension with respect to mean and standard deviation of the same dimension. */ -class CLMeanStdDevNormalizationKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLMeanStdDevNormalizationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMeanStdDevNormalizationKernel(const CLMeanStdDevNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMeanStdDevNormalizationKernel &operator=(const CLMeanStdDevNormalizationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMeanStdDevNormalizationKernel(CLMeanStdDevNormalizationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMeanStdDevNormalizationKernel &operator=(CLMeanStdDevNormalizationKernel &&) = default; - /** Default destructor */ - ~CLMeanStdDevNormalizationKernel() = default; - /** Initialise the kernel's input and outputs. - * - * @note If the output tensor is a nullptr, the normalization will be performed in-place. - * - * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr, - * this tensor will store the result of the normalization. Data types supported: F16/F32. - * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input - * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. - */ - void configure(ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f); - /** Initialise the kernel's input and outputs. - * - * @note If the output tensor is a nullptr, the normalization will be performed in-place. - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr, - * this tensor will store the result of the normalization. Data types supported: F16/F32. - * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input - * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. - */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f); - /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevNormalizationKernel - * - * @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr, - * this tensor will store the result of the normalization. Data types supported: F16/F32. - * @param[in] output (Optional) Destination tensor info. It can be nullptr in case of in-place computation. Data type supported: same as @p input - * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output = nullptr, float epsilon = 1e-8f); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; - ICLTensor *_output; - bool _run_in_place; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h b/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h deleted file mode 100644 index ccb475360f..0000000000 --- a/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMEDIAN3X3KERNEL_H -#define ARM_COMPUTE_CLMEDIAN3X3KERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the median 3x3 filter kernel. - * - */ -class CLMedian3x3Kernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLMEDIAN3X3KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLMemsetKernel.h b/arm_compute/core/CL/kernels/CLMemsetKernel.h deleted file mode 100644 index 5bda480306..0000000000 --- a/arm_compute/core/CL/kernels/CLMemsetKernel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMEMSETKERNEL_H -#define ARM_COMPUTE_CLMEMSETKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for filling the planes of a tensor */ -class CLMemsetKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLMemsetKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMemsetKernel(const CLMemsetKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMemsetKernel &operator=(const CLMemsetKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMemsetKernel(CLMemsetKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMemsetKernel &operator=(CLMemsetKernel &&) = default; - /** Default destructor */ - ~CLMemsetKernel() = default; - - /** Initialise the kernel's tensor and filling value - * - * @param[in,out] tensor Input tensor to fill. Supported data types: All. - * @param[in] constant_value The value used to fill the planes of the tensor - * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. - */ - void configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr); - /** Initialise the kernel's tensor and filling value - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] tensor Input tensor to fill. Supported data types: All. - * @param[in] constant_value The value used to fill the planes of the tensor - * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. - */ - void configure(const CLCompileContext &compile_context, ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref CLMemsetKernel - * - * @param[in] tensor Source tensor info. Data types supported: All. - * @param[in] constant_value The value used to fill the planes of the tensor - * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. - * - * @return a status - */ - static Status validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_tensor; - Window _full_window; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLMEMSETRKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h b/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h deleted file mode 100644 index a693cfdb27..0000000000 --- a/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMINMAXLAYERKERNEL_H -#define ARM_COMPUTE_CLMINMAXLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to perform min max search on a 3D tensor. - */ -class CLMinMaxLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLMinMaxLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMinMaxLayerKernel(const CLMinMaxLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMinMaxLayerKernel &operator=(const CLMinMaxLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMinMaxLayerKernel(CLMinMaxLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMinMaxLayerKernel &operator=(CLMinMaxLayerKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32. - * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor. - * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32. - * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor. - * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel - * - * @param[in] input Input tensor info. Data types supported: F32. - * @param[in] output Output tensor info with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor. - * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - /** Resets global minimum and maximum - * - * @param[in,out] queue Command queue on which to map and unmap the min_max tensor - */ - void reset(cl::CommandQueue &queue); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLMINMAXLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h b/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h deleted file mode 100644 index fbcf69752c..0000000000 --- a/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H -#define ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H - -#include "arm_compute/core/CL/ICLArray.h" -#include "arm_compute/core/CL/ICLKernel.h" - -#include - -namespace arm_compute -{ -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the kernel to perform min max search on an image. - */ -class CLMinMaxKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLMinMaxKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMinMaxKernel(const CLMinMaxKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMinMaxKernel &operator=(const CLMinMaxKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMinMaxKernel(CLMinMaxKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMinMaxKernel &operator=(CLMinMaxKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input Image. Data types supported: U8/S16/F32. - * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. - */ - void configure(const ICLImage *input, cl::Buffer *min_max); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input Image. Data types supported: U8/S16/F32. - * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Input image. */ - cl::Buffer *_min_max; /**< Minimum/maximum value. */ - std::array _data_type_max_min; /**< Maximum and minimum data type value respectively. */ -}; - -/** Interface for the kernel to find min max locations of an image. - */ -class CLMinMaxLocationKernel : public ICLKernel -{ -public: - /** Constructor */ - CLMinMaxLocationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMinMaxLocationKernel(const CLMinMaxLocationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMinMaxLocationKernel &operator=(const CLMinMaxLocationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMinMaxLocationKernel(CLMinMaxLocationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMinMaxLocationKernel &operator=(CLMinMaxLocationKernel &&) = default; - /** Initialise the kernel's input and outputs. - * - * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. - * - * @param[in] input Input image. Data types supported: U8/S16/F32. - * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32 - * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. - * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. - */ - void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, - ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr); - /** Initialise the kernel's input and outputs. - * - * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input image. Data types supported: U8/S16/F32. - * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32 - * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. - * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, - ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLImage *_input; /**< Input image. */ - cl::Buffer *_min_max_count; /**< Minimum/maximum value occurrences. */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h b/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h deleted file mode 100644 index cee64480b6..0000000000 --- a/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H -#define ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" -#include "arm_compute/core/Types.h" - -#include - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to apply a non-linear filter */ -class CLNonLinearFilterKernel : public ICLSimple2DKernel -{ -public: - /** Default constructor */ - CLNonLinearFilterKernel(); - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data types supported: U8 - * @param[out] output Destination tensor. Data types supported: U8 - * @param[in] function Non linear function to perform - * @param[in] mask_size Mask size. Supported sizes: 3, 5 - * @param[in] pattern Mask pattern - * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, - unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, - bool border_undefined); - /** Set the source, destination and border mode of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8 - * @param[out] output Destination tensor. Data types supported: U8 - * @param[in] function Non linear function to perform - * @param[in] mask_size Mask size. Supported sizes: 3, 5 - * @param[in] pattern Mask pattern - * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, - unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, - bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; - -private: - BorderSize _border_size; /**< Border size */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h b/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h deleted file mode 100644 index d1bba4f480..0000000000 --- a/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H -#define ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface to perform Non-Maxima suppression over a 3x3 window using OpenCL - * - * @note Used by @ref CLFastCorners and @ref CLHarrisCorners - */ -class CLNonMaximaSuppression3x3Kernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's sources, destinations and border mode. - * - * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor) - * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor) - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /** Initialise the kernel's sources, destinations and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor) - * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor) - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h deleted file mode 100644 index 6233d28b0a..0000000000 --- a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the normalization layer kernel. - */ -class CLNormalizationLayerKernel : public ICLKernel -{ -public: - /** Constructor */ - CLNormalizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLNormalizationLayerKernel(const CLNormalizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLNormalizationLayerKernel &operator=(const CLNormalizationLayerKernel &) = delete; - /** Default Move Constructor. */ - CLNormalizationLayerKernel(CLNormalizationLayerKernel &&) = default; - /** Default move assignment operator */ - CLNormalizationLayerKernel &operator=(CLNormalizationLayerKernel &&) = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input. - * Data layouts supported: same as @p input. - * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. - */ - void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input. - * Data layouts supported: same as @p input. - * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayerKernel - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[in] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input. - * Data layouts supported: same as @p input. - * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, NormalizationLayerInfo norm_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - BorderSize _border_size; - bool _is_norm_across_width; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h deleted file mode 100644 index 2e2e60df0b..0000000000 --- a/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H -#define ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the NormalizePlanarYUV layer kernel. */ -class CLNormalizePlanarYUVLayerKernel : public ICLKernel -{ -public: - /** Constructor */ - CLNormalizePlanarYUVLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLNormalizePlanarYUVLayerKernel(const CLNormalizePlanarYUVLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLNormalizePlanarYUVLayerKernel &operator=(const CLNormalizePlanarYUVLayerKernel &) = delete; - /** Default Move Constructor. */ - CLNormalizePlanarYUVLayerKernel(CLNormalizePlanarYUVLayerKernel &&) = default; - /** Default move assignment operator */ - CLNormalizePlanarYUVLayerKernel &operator=(CLNormalizePlanarYUVLayerKernel &&) = default; - /** Default destructor */ - ~CLNormalizePlanarYUVLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels]. - * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input - * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels. - * Data types supported: same as @p input - */ - void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels]. - * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input - * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels. - * Data types supported: same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std); - /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayerKernel - * - * @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels]. - * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor info. Data type supported: same as @p input - * @param[in] mean Mean values tensor info. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input - * @param[in] std Standard deviation values tensor info. 1 dimension with size equal to the number of input channels. - * Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - const ICLTensor *_mean; - const ICLTensor *_std; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLPadLayerKernel.h b/arm_compute/core/CL/kernels/CLPadLayerKernel.h deleted file mode 100644 index 5bf5841803..0000000000 --- a/arm_compute/core/CL/kernels/CLPadLayerKernel.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLPADLAYERKERNEL_H -#define ARM_COMPUTE_CLPADLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the PadLayer function. */ -class CLPadLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLPadLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPadLayerKernel(const CLPadLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPadLayerKernel &operator=(const CLPadLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLPadLayerKernel(CLPadLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLPadLayerKernel &operator=(CLPadLayerKernel &&) = default; - /** Default destructor */ - ~CLPadLayerKernel() = default; - /** Set the input and output tensor. - * - * @param[in] input Source tensor. Data types supported: All. - * @param[out] output Output tensor. Data type supported: same as @p input - * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] - * specifies the front and the end padding in the i-th dimension. - * @param[in] constant_value (Optional) Constant value to be used for the padding. - * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, - * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). - */ - void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT); - /** Set the input and output tensor. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: All. - * @param[out] output Output tensor. Data type supported: same as @p input - * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] - * specifies the front and the end padding in the i-th dimension. - * @param[in] constant_value (Optional) Constant value to be used for the padding. - * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, - * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), - PaddingMode mode = PaddingMode::CONSTANT); - /** Static function to check if given info will lead to a valid configuration of @ref CLPadLayerKernel - * - * @param[in] input Source tensor info. Data types supported: All. - * @param[in] output Output tensor info. Data type supported: same as @p input - * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] - * specifies the front and the end padding in the i-th dimension. - * @param[in] constant_value (Optional) Constant value to be used for the padding. - * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, - * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - int _input_start_x; - int _input_start_y; - bool _4d_enabled; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLPADLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLPermuteKernel.h b/arm_compute/core/CL/kernels/CLPermuteKernel.h deleted file mode 100644 index bb841b1c83..0000000000 --- a/arm_compute/core/CL/kernels/CLPermuteKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLPERMUTEKERNEL_H -#define ARM_COMPUTE_CLPERMUTEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform tensor permutation. - * - * Permutes given a permutation vector - */ -class CLPermuteKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLPermuteKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPermuteKernel(const CLPermuteKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPermuteKernel &operator=(const CLPermuteKernel &) = delete; - /** Allow instances of this class to be moved */ - CLPermuteKernel(CLPermuteKernel &&) = default; - /** Allow instances of this class to be moved */ - CLPermuteKernel &operator=(CLPermuteKernel &&) = default; - /** Set the input and output of the kernel. - * - * @note Arbitrary permutation vectors are supported with rank not greater than 4 - * - * @param[in] input The input tensor to permute. Data types supported: All. - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] perm Permutation vector - */ - void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm); - /** Set the input and output of the kernel. - * - * @note Arbitrary permutation vectors are supported with rank not greater than 4 - * - * @param[in] compile_context The compile context to be used. - * @param[in] input The input tensor to permute. Data types supported: All. - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] perm Permutation vector - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm); - /** Static function to check if given info will lead to a valid configuration of @ref CLPermuteKernel - * - * @note Arbitrary permutation vectors are supported with rank not greater than 4 - * - * @param[in] input First tensor input info. Data types supported: All. - * @param[in] output Output tensor info. Data types supported: same as @p input. - * @param[in] perm Permutation vector - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - PermutationVector _perm; -}; -} // arm_compute -#endif /*ARM_COMPUTE_CLPERMUTEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h deleted file mode 100644 index 6b5bd11bde..0000000000 --- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H -#define ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** Interface for the pixelwise multiplication kernel. */ -class CLPixelWiseMultiplicationKernel : public ICLKernel -{ -public: - /** Default constructor.*/ - CLPixelWiseMultiplicationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPixelWiseMultiplicationKernel(const CLPixelWiseMultiplicationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPixelWiseMultiplicationKernel &operator=(const CLPixelWiseMultiplicationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLPixelWiseMultiplicationKernel(CLPixelWiseMultiplicationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLPixelWiseMultiplicationKernel &operator=(CLPixelWiseMultiplicationKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * Valid configurations (Input1,Input2) -> Output : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,U8) -> S16 - * - (S16,S16) -> S16 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 - * - (QSYMM16,QSYMM16) -> S32 - * - * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Initialise the kernel's input, output and border mode. - * - * Valid configurations (Input1,Input2) -> Output : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,U8) -> S16 - * - (S16,S16) -> S16 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 - * - (QSYMM16,QSYMM16) -> S32 - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplicationKernel - * - * Valid configurations (Input1,Input2) -> Output : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,U8) -> S16 - * - (S16,S16) -> S16 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 - * - (QSYMM16,QSYMM16) -> S32 - * - * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ITensorInfo *_input1; - const ITensorInfo *_input2; - ITensorInfo *_output; -}; - -/** Interface for the complex pixelwise multiplication kernel. */ -class CLComplexPixelWiseMultiplicationKernel : public ICLKernel -{ -public: - /** Default constructor.*/ - CLComplexPixelWiseMultiplicationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLComplexPixelWiseMultiplicationKernel(const CLComplexPixelWiseMultiplicationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLComplexPixelWiseMultiplicationKernel &operator=(const CLComplexPixelWiseMultiplicationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLComplexPixelWiseMultiplicationKernel(CLComplexPixelWiseMultiplicationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLComplexPixelWiseMultiplicationKernel &operator=(CLComplexPixelWiseMultiplicationKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2. - * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[out] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2. - * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[out] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplicationKernel - * - * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2. - * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ITensorInfo *_input1; - const ITensorInfo *_input2; - ITensorInfo *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h deleted file mode 100644 index 85585e4587..0000000000 --- a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H -#define ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/Error.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the pooling layer kernel */ -class CLPoolingLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLPoolingLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPoolingLayerKernel(const CLPoolingLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPoolingLayerKernel &operator=(const CLPoolingLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLPoolingLayerKernel(CLPoolingLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLPoolingLayerKernel &operator=(CLPoolingLayerKernel &&) = default; - /** Default destructor */ - ~CLPoolingLayerKernel() = default; - - /** Set the input and output tensors. - * - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. - */ - void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr); - /** Set the input and output tensors. - * - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayerKernel - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] output Destination tensor info. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -public: - const ICLTensor *_input; - ICLTensor *_output; - ICLTensor *_indices; - PoolingLayerInfo _pool_info; - DataLayout _data_layout; - BorderSize _border_size; - unsigned int _num_elems_processed_per_iteration; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h b/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h deleted file mode 100644 index b4a69ac496..0000000000 --- a/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H -#define ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the PriorBox layer kernel. */ -class CLPriorBoxLayerKernel : public ICLKernel -{ -public: - /** Constructor */ - CLPriorBoxLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPriorBoxLayerKernel(const CLPriorBoxLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPriorBoxLayerKernel &operator=(const CLPriorBoxLayerKernel &) = delete; - /** Default Move Constructor. */ - CLPriorBoxLayerKernel(CLPriorBoxLayerKernel &&) = default; - /** Default move assignment operator */ - CLPriorBoxLayerKernel &operator=(CLPriorBoxLayerKernel &&) = default; - /** Default destructor */ - ~CLPriorBoxLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. - * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 - * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1 - * @param[in] info Prior box layer info. - * @param[in] min Minimum prior box values - * @param[in] max Maximum prior box values - * @param[in] aspect_ratios Aspect ratio values - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, cl::Buffer *aspect_ratios); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. - * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 - * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1 - * @param[in] info Prior box layer info. - * @param[in] min Minimum prior box values - * @param[in] max Maximum prior box values - * @param[in] aspect_ratios Aspect ratio values - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, - cl::Buffer *aspect_ratios); - /** Static function to check if given info will lead to a valid configuration of @ref CLPriorBoxLayerKernel - * - * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC. - * @param[in] input2 Second source tensor info. Data types and layouts supported: same as @p input1 - * @param[in] output Destination tensor info. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input1 - * @param[in] info Prior box layer info. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; - PriorBoxLayerInfo _info; - int _num_priors; - cl::Buffer *_min; - cl::Buffer *_max; - cl::Buffer *_aspect_ratios; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h b/arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h deleted file mode 100644 index 51c50bc011..0000000000 --- a/arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H -#define ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to do layer normalization. */ -class CLQLSTMLayerNormalizationKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLQLSTMLayerNormalizationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLQLSTMLayerNormalizationKernel(const CLQLSTMLayerNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLQLSTMLayerNormalizationKernel &operator=(const CLQLSTMLayerNormalizationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLQLSTMLayerNormalizationKernel(CLQLSTMLayerNormalizationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLQLSTMLayerNormalizationKernel &operator=(CLQLSTMLayerNormalizationKernel &&) = default; - /** Default destructor */ - ~CLQLSTMLayerNormalizationKernel() = default; - /** Initialise the kernel's input and outputs. - * - * @param[in] input Source tensor with 2 dimensions. Data types supported: QSYMM16. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] weight Weight tensor. Data types supported: Same as @p input. - * @param[in] bias Bias tensor. Data types supported: S32. - * - */ - void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias); - /** Initialise the kernel's input and outputs. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor with 2 dimensions. Data types supported: QSYMM16. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] weight Weight tensor. Data types supported: Same as @p input. - * @param[in] bias Bias tensor. Data types supported: S32. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias); - /** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayerNormalizationKernel - * - * @param[in] input Source tensor info with 2 dimensions. Data types supported: QSYMM16. - * @param[in] output Destination info tensor. Data type supported: same as @p input - * @param[in] weight Weight info tensor. Data types supported: Same as @p input. - * @param[in] bias Bias tensor info. Data types supported: S32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_weight; - const ICLTensor *_bias; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h deleted file mode 100644 index b0144bf8b0..0000000000 --- a/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the quantization layer kernel. - * - * @note The implementation supports only 3D input tensors. - */ -class CLQuantizationLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLQuantizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLQuantizationLayerKernel(const CLQuantizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLQuantizationLayerKernel &operator=(const CLQuantizationLayerKernel &) = delete; - /** Default Move Constructor. */ - CLQuantizationLayerKernel(CLQuantizationLayerKernel &&) = default; - /** Default move assignment operator */ - CLQuantizationLayerKernel &operator=(CLQuantizationLayerKernel &&) = default; - /** Default destructor */ - ~CLQuantizationLayerKernel() = default; - /** Set the input, output. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. - * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. - * - * @note Output auto initialization is not supported by this kernel - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Set the input, output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. - * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. - * - * @note Output auto initialization is not supported by this kernel - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLQuantizationLayerKernel - * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. - * @param[in] output Destination tensor info with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h b/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h deleted file mode 100644 index 6a0468d331..0000000000 --- a/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H -#define ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLArray.h" -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the RoIAlign kernel. - */ -class CLROIAlignLayerKernel : public ICLKernel -{ -public: - /** Constructor */ - CLROIAlignLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLROIAlignLayerKernel(const CLROIAlignLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLROIAlignLayerKernel &operator=(const CLROIAlignLayerKernel &) = delete; - /** Default Move Constructor. */ - CLROIAlignLayerKernel(CLROIAlignLayerKernel &&) = default; - /** Default move assignment operator. */ - CLROIAlignLayerKernel &operator=(CLROIAlignLayerKernel &&) = default; - /** Default destructor */ - ~CLROIAlignLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner - * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. - * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. - * - * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled - * width and pooled height. - * @note The z dimensions of @p output tensor and @p input tensor must be the same. - * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. - */ - void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner - * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. - * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. - * - * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled - * width and pooled height. - * @note The z dimensions of @p output tensor and @p input tensor must be the same. - * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLROIAlignLayerKernel - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, - * otherwise same as @p input - * @param[in] output Destination tensor info. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. - * - * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled - * width and pooled height. - * @note The z dimensions of @p output tensor and @p input tensor must be the same. - * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. - * - * @return a Status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue); - -private: - const ICLTensor *_input; - ICLTensor *_output; - const ICLTensor *_rois; - ROIPoolingLayerInfo _pool_info; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H*/ diff --git a/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h deleted file mode 100644 index ee422e10ee..0000000000 --- a/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H -#define ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/CL/ICLArray.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the ROI pooling layer kernel */ -class CLROIPoolingLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLROIPoolingLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLROIPoolingLayerKernel(const CLROIPoolingLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLROIPoolingLayerKernel &operator=(const CLROIPoolingLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLROIPoolingLayerKernel(CLROIPoolingLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLROIPoolingLayerKernel &operator=(CLROIPoolingLayerKernel &&) = default; - /** Default destructor */ - ~CLROIPoolingLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F16/F32. - * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner - * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16 - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. - * - * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled - * width and pooled height. - * @note The z dimensions of @p output tensor and @p input tensor must be the same. - * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. - */ - void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: F16/F32. - * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner - * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16 - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. - * - * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled - * width and pooled height. - * @note The z dimensions of @p output tensor and @p input tensor must be the same. - * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_rois; - ICLTensor *_output; - ROIPoolingLayerInfo _pool_info; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLRangeKernel.h b/arm_compute/core/CL/kernels/CLRangeKernel.h deleted file mode 100644 index b5c64b2480..0000000000 --- a/arm_compute/core/CL/kernels/CLRangeKernel.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLRANGEKERNEL_H -#define ARM_COMPUTE_CLRANGEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Kernel class for Range - * - * range generates a 1-D tensor containing a sequence of numbers that begins at 'start' and extends by increments - * of 'step' up to but not including 'end'. - */ -class CLRangeKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLRangeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLRangeKernel(const CLRangeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLRangeKernel &operator=(const CLRangeKernel &) = delete; - /** Allow instances of this class to be moved */ - CLRangeKernel(CLRangeKernel &&) = default; - /** Allow instances of this class to be moved */ - CLRangeKernel &operator=(CLRangeKernel &&) = default; - /** Default destructor */ - ~CLRangeKernel() = default; - /** Initialize the kernel's output tensor, start, end and step of the sequence. - * - * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. - * @param[in] start The starting value of the sequence. - * @param[in] end The ending (not including) value of the sequence. - * @param[in] step The gap between each pair of values in the sequence. - */ - void configure(ICLTensor *output, float start, float end, float step); - /** Initialize the kernel's output tensor, start, end and step of the sequence. - * - * @param[in] compile_context The compile context to be used. - * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. - * @param[in] start The starting value of the sequence. - * @param[in] end The ending (not including) value of the sequence. - * @param[in] step The gap between each pair of values in the sequence. - */ - void configure(const CLCompileContext &compile_context, ICLTensor *output, float start, float end, float step); - /** Static function to check if given info will lead to a valid configuration of @ref CLRangeKernel - * - * @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. - * @param[in] start The starting value of the sequence. - * @param[in] end The ending (not including) value of the sequence. - * @param[in] step The gap between each pair of values in the sequence. - * - * @return a status - */ - static Status validate(const ITensorInfo *output, float start, float end, float step); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - float _start; /**< Start of sequence */ - float _end; /**< End of sequence */ - float _step; /**< Increment/step value */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLRANGEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h deleted file mode 100644 index 2ecd1c9fd4..0000000000 --- a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H -#define ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the reduction operation kernel - */ -class CLReductionOperationKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLReductionOperationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLReductionOperationKernel(const CLReductionOperationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLReductionOperationKernel &operator=(const CLReductionOperationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLReductionOperationKernel(CLReductionOperationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLReductionOperationKernel &operator=(CLReductionOperationKernel &&) = default; - /** Default destructor */ - ~CLReductionOperationKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 - * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX - * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image. - */ - void configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 - * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX - * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0); - - /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel. - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. - * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 - * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX - * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, unsigned int width = 0); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - unsigned int _reduction_axis; - ReductionOperation _op; - BorderSize _border_size; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLRemapKernel.h b/arm_compute/core/CL/kernels/CLRemapKernel.h deleted file mode 100644 index fd261cd465..0000000000 --- a/arm_compute/core/CL/kernels/CLRemapKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLREMAPKERNEL_H -#define ARM_COMPUTE_CLREMAPKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform a remap on a tensor */ -class CLRemapKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLRemapKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLRemapKernel(const CLRemapKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLRemapKernel &operator=(const CLRemapKernel &) = delete; - /** Allow instances of this class to be moved */ - CLRemapKernel(CLRemapKernel &&) = default; - /** Allow instances of this class to be moved */ - CLRemapKernel &operator=(CLRemapKernel &&) = default; - /** Initialize the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] map_x Map for X coordinates. Data types supported: F32. - * @param[in] map_y Map for Y coordinates. Data types supported: F32. - * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. - * @param[in] policy The interpolation type. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); - /** Initialize the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] map_x Map for X coordinates. Data types supported: F32. - * @param[in] map_y Map for Y coordinates. Data types supported: F32. - * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. - * @param[in] policy The interpolation type. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - const ICLTensor *_map_x; - const ICLTensor *_map_y; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLREMAPKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h b/arm_compute/core/CL/kernels/CLReorgLayerKernel.h deleted file mode 100644 index e3edc9f724..0000000000 --- a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLREORGLAYERKERNEL_H -#define ARM_COMPUTE_CLREORGLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform a reorg layer */ -class CLReorgLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLReorgLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLReorgLayerKernel(const CLReorgLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLReorgLayerKernel &operator=(const CLReorgLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLReorgLayerKernel(CLReorgLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLReorgLayerKernel &operator=(CLReorgLayerKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input Source tensor. Data types supported: All. - * @param[out] output Destination tensor with tensor shape: - * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has - * the same number of input elements. Data types supported: same as @p input. - * @param[in] stride Stride value to use for reorganizing the values in the output tensor. - * It defines the spatial distance between 2 consecutive pixels in the x and y direction - */ - void configure(const ICLTensor *input, ICLTensor *output, int32_t stride); - /** Initialize the kernel's input, output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: All. - * @param[out] output Destination tensor with tensor shape: - * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has - * the same number of input elements. Data types supported: same as @p input. - * @param[in] stride Stride value to use for reorganizing the values in the output tensor. - * It defines the spatial distance between 2 consecutive pixels in the x and y direction - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t stride); - /** Static function to check if given info will lead to a valid configuration of @ref CLReorgLayerKernel - * - * @param[in] input Source tensor. Data types supported: All. - * @param[in] output Destination tensor with tensor shape: - * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has - * the same number of input elements. Data types supported: same as @p input. Data types supported: same as @p input. - * @param[in] stride Stride value to use for reorganizing the values in the output tensor - * It defines the spatial distance between 2 consecutive pixels in the x and y direction - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t stride); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLREORGLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h b/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h deleted file mode 100644 index 6e3f255c52..0000000000 --- a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLRESHAPELAYERKERNEL_H -#define ARM_COMPUTE_CLRESHAPELAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to perform tensor reshaping */ -class CLReshapeLayerKernel : public ICLKernel -{ -public: - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor info. Data type supported: All. - * @param[out] output Destination tensor info. Data type supported: Same as @p input - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output); - - /** Static function to check if given info will lead to a valid configuration of @ref CLReshapeLayerKernel - * - * @param[in] input Source tensor info. Data type supported: All - * @param[in] output Destination tensor info. Data type supported: Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLRESHAPELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLReverseKernel.h b/arm_compute/core/CL/kernels/CLReverseKernel.h deleted file mode 100644 index 17f1a4a20f..0000000000 --- a/arm_compute/core/CL/kernels/CLReverseKernel.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLREVERSEKERNEL_H -#define ARM_COMPUTE_CLREVERSEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the reverse kernel */ -class CLReverseKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLReverseKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLReverseKernel(const CLReverseKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLReverseKernel &operator=(const CLReverseKernel &) = delete; - /** Allow instances of this class to be moved */ - CLReverseKernel(CLReverseKernel &&) = default; - /** Allow instances of this class to be moved */ - CLReverseKernel &operator=(CLReverseKernel &&) = default; - /** Default destructor */ - ~CLReverseKernel() = default; - /** Initialise the kernel's inputis and output - * - * @param[in] input Input tensor. Data types supported: All. - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 - */ - void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis); - /** Initialise the kernel's inputis and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis); - - /** Static function to check if given info will lead to a valid configuration of @ref CLReverseKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] output Output tensor info. Data type supported: Same as @p input - * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -public: - const ICLTensor *_input; - ICLTensor *_output; - const ICLTensor *_axis; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLREVERSEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLScaleKernel.h b/arm_compute/core/CL/kernels/CLScaleKernel.h deleted file mode 100644 index 79f7ed181a..0000000000 --- a/arm_compute/core/CL/kernels/CLScaleKernel.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSCALEKERNEL_H -#define ARM_COMPUTE_CLSCALEKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the scale kernel */ -class CLScaleKernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's inputs, output and interpolation policy - * - * @param[in] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 - * @param[out] output Destination tensor. Data types supported: Same as @p input - * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] info @ref ScaleKernelInfo Kernel descriptor to be used to configure. - */ - void configure(const ICLTensor *input, ICLTensor *output, const ScaleKernelInfo &info); - /** Initialise the kernel's inputs, output and interpolation policy - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 - * @param[out] output Destination tensor. Data types supported: Same as @p input - * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] info @ref ScaleKernelInfo Kernel descriptor to be used to configure. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ScaleKernelInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref CLScaleKernel - * - * @param[in] input Source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 - * @param[in] output Destination tensor info. Data types supported: Same as @p input - * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] info @ref ScaleKernelInfo Kernel descriptor to be used to validate - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ScaleKernelInfo &info); - /** Input tensor accessor. - * - * @return Pointer to input tensor. - */ - const ICLTensor *input() const; - /** Output tensor accessor. - * - * @return Pointer to output tensor. - */ - const ICLTensor *output() const; - - // Inherited methods overridden: - BorderSize border_size() const override; - void run(const Window &window, cl::CommandQueue &queue) override; - - // Getter for interpolation policy - InterpolationPolicy get_interpolation_policy() const - { - return _interpolation_policy; - } - -private: - InterpolationPolicy _interpolation_policy = InterpolationPolicy::BILINEAR; - DataLayout _data_layout = DataLayout::UNKNOWN; - bool _align_corners = false; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLSCALEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h b/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h deleted file mode 100644 index 1af56a764e..0000000000 --- a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSCHARR3X3KERNEL_H -#define ARM_COMPUTE_CLSCHARR3X3KERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run a 3x3 Scharr filter on a tensor. - * - * @f[ - * \mathbf{G}_x=\begin{vmatrix} - * -3 & 0 & +3\\ - * -10& 0 & +10\\ - * -3 & 0 & +3 - * \end{vmatrix} - * @f] - * @f[ - * \mathbf{G}_y=\begin{vmatrix} - * -3 & -10 & -3\\ - * 0 & 0 & 0\\ - * +3 & +10 & +3 - * \end{vmatrix} - * @f] - */ -class CLScharr3x3Kernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLScharr3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLScharr3x3Kernel(const CLScharr3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLScharr3x3Kernel &operator=(const CLScharr3x3Kernel &) = delete; - /** Allow instances of this class to be moved */ - CLScharr3x3Kernel(CLScharr3x3Kernel &&) = default; - /** Allow instances of this class to be moved */ - CLScharr3x3Kernel &operator=(CLScharr3x3Kernel &&) = default; - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - bool _run_scharr_x; /**< Do we need to run Scharr X ? */ - bool _run_scharr_y; /**< Do we need to run Scharr Y ? */ - const ICLTensor *_input; /**< Input image */ - ICLTensor *_output_x; /**< Output image for scharr X */ - ICLTensor *_output_y; /**< Output image for scharr Y */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLSCHARR3X3KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLSelectKernel.h b/arm_compute/core/CL/kernels/CLSelectKernel.h deleted file mode 100644 index 4015a273ea..0000000000 --- a/arm_compute/core/CL/kernels/CLSelectKernel.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSELECTKERNEL_H -#define ARM_COMPUTE_CLSELECTKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** OpenCL interface for executing the select kernel - * - * Select is computed by: - * @f[ output(i) = condition(i) ? x(i) : y(i) @f] - **/ -class CLSelectKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLSelectKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSelectKernel(const CLSelectKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSelectKernel &operator=(const CLSelectKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSelectKernel(CLSelectKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSelectKernel &operator=(CLSelectKernel &&) = default; - /** Default destructor */ - ~CLSelectKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] c Condition input tensor. Data types supported: U8. - * @param[in] x First input tensor. Data types supported: All. - * @param[out] y Second input tensor. Data types supported: Same as @p x - * @param[in] output Output tensor. Data types supported: Same as @p x. - */ - void configure(const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output); - /** Initialise the kernel's inputs and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] c Condition input tensor. Data types supported: U8. - * @param[in] x First input tensor. Data types supported: All. - * @param[out] y Second input tensor. Data types supported: Same as @p x - * @param[in] output Output tensor. Data types supported: Same as @p x. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLSelectKernel - * - * @param[in] c Condition input tensor. Data types supported: U8. - * @param[in] x First input tensor. Data types supported: All. - * @param[in] y Second input tensor. Data types supported: Same as @p x - * @param[in] output Output tensor. Data types supported: Same as @p x. - * - * @return a status - */ - static Status validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_c; /**< Condition tensor */ - const ICLTensor *_x; /**< Source tensor 1 */ - const ICLTensor *_y; /**< Source tensor 2 */ - ICLTensor *_output; /**< Destination tensor */ - bool _has_same_rank; /**< Flag that indicates if condition tensor and other inputs have the same rank */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLWHEREKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h b/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h deleted file mode 100644 index e24767852e..0000000000 --- a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSOBEL3X3KERNEL_H -#define ARM_COMPUTE_CLSOBEL3X3KERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run a 3x3 Sobel filter on a tensor. */ -class CLSobel3x3Kernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLSobel3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel3x3Kernel(const CLSobel3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel3x3Kernel &operator=(const CLSobel3x3Kernel &) = delete; - /** Allow instances of this class to be moved */ - CLSobel3x3Kernel(CLSobel3x3Kernel &&) = default; - /** Allow instances of this class to be moved */ - CLSobel3x3Kernel &operator=(CLSobel3x3Kernel &&) = default; - /** Default destructor */ - ~CLSobel3x3Kernel() = default; - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; /**< Input tensor */ - ICLTensor *_output_x; /**< Output tensor for Sobel X */ - ICLTensor *_output_y; /**< Output tensor for Sobel Y */ - bool _run_sobel_x; /**< Do we need to run Sobel X ? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLSOBEL3X3KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h b/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h deleted file mode 100644 index 82831ed14d..0000000000 --- a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSOBEL5X5KERNEL_H -#define ARM_COMPUTE_CLSOBEL5X5KERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. */ -class CLSobel5x5HorKernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLSobel5x5HorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel5x5HorKernel(const CLSobel5x5HorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel5x5HorKernel &operator=(const CLSobel5x5HorKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSobel5x5HorKernel(CLSobel5x5HorKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSobel5x5HorKernel &operator=(CLSobel5x5HorKernel &&) = default; - /** Default destructor */ - ~CLSobel5x5HorKernel() = default; - - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; /**< Input tensor */ - ICLTensor *_output_x; /**< X output of horizontal pass */ - ICLTensor *_output_y; /**< Y output of horizontal pass */ - bool _run_sobel_x; /**< Do we need to run Sobel X ? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ - BorderSize _border_size; /**< Border size */ -}; - -/** Interface for the kernel to run the vertical pass of 5x5 Sobel filter on a tensor. */ -class CLSobel5x5VertKernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLSobel5x5VertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel5x5VertKernel(const CLSobel5x5VertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel5x5VertKernel &operator=(const CLSobel5x5VertKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSobel5x5VertKernel(CLSobel5x5VertKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSobel5x5VertKernel &operator=(CLSobel5x5VertKernel &&) = default; - /** Default destructor */ - ~CLSobel5x5VertKernel() = default; - - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set and the corresponding input. - * - * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16. - * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set and the corresponding input. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16. - * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */ - const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */ - ICLTensor *_output_x; /**< X output of sobel */ - ICLTensor *_output_y; /**< Y output of sobel */ - bool _run_sobel_x; /**< Do we need to run sobel X? */ - bool _run_sobel_y; /**< Do we need to run sobel Y? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLSOBEL5X5KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h b/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h deleted file mode 100644 index d55993d1f0..0000000000 --- a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSOBEL7X7KERNEL_H -#define ARM_COMPUTE_CLSOBEL7X7KERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. */ -class CLSobel7x7HorKernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLSobel7x7HorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel7x7HorKernel(const CLSobel7x7HorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel7x7HorKernel &operator=(const CLSobel7x7HorKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSobel7x7HorKernel(CLSobel7x7HorKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSobel7x7HorKernel &operator=(CLSobel7x7HorKernel &&) = default; - /** Default destructor */ - ~CLSobel7x7HorKernel() = default; - - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; /**< Input tensor */ - ICLTensor *_output_x; /**< X output of horizontal pass */ - ICLTensor *_output_y; /**< Y output of horizontal pass */ - bool _run_sobel_x; /**< Do we need to run Sobel X ? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ - BorderSize _border_size; /**< Border size */ -}; - -/** Interface for the kernel to run the vertical pass of 7x7 Sobel filter on a tensor. */ -class CLSobel7x7VertKernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLSobel7x7VertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel7x7VertKernel(const CLSobel7x7VertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel7x7VertKernel &operator=(const CLSobel7x7VertKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSobel7x7VertKernel(CLSobel7x7VertKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSobel7x7VertKernel &operator=(CLSobel7x7VertKernel &&) = default; - /** Default destructor */ - ~CLSobel7x7VertKernel() = default; - - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set and the corresponding input. - * - * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32. - * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set and the corresponding input. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32. - * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */ - const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */ - ICLTensor *_output_x; /**< X output of sobel */ - ICLTensor *_output_y; /**< Y output of sobel */ - bool _run_sobel_x; /**< Do we need to run sobel X? */ - bool _run_sobel_y; /**< Do we need to run sobel Y? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLSOBEL7X7KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h deleted file mode 100644 index f8c1019d53..0000000000 --- a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H -#define ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLSimple3DKernel.h" -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for max, shifting, exponentiating and summing the logits */ -class CLLogits1DMaxShiftExpSumKernel : public ICLKernel -{ -public: - /** Info for whether a parallel reduction will be run and the vector size of the execution. */ - using ParallelReductionInfo = std::tuple; - -public: - /** Default constructor */ - CLLogits1DMaxShiftExpSumKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLogits1DMaxShiftExpSumKernel(const CLLogits1DMaxShiftExpSumKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLogits1DMaxShiftExpSumKernel &operator=(const CLLogits1DMaxShiftExpSumKernel &) = delete; - /** Allow instances of this class to be moved */ - CLLogits1DMaxShiftExpSumKernel(CLLogits1DMaxShiftExpSumKernel &&) = default; - /** Allow instances of this class to be moved */ - CLLogits1DMaxShiftExpSumKernel &operator=(CLLogits1DMaxShiftExpSumKernel &&) = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in,out] max Max values tensor. Data types supported: same as @p input - * @param[out] output Destination tensor. Data types supported: same as @p input - * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input - * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. - */ - void configure(const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in,out] max Max values tensor. Data types supported: same as @p input - * @param[out] output Destination tensor. Data types supported: same as @p input - * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input - * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxShiftExpSumKernel - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] max Max values tensor. Data types supported: same as @p input - * @param[in] output Destination tensor. Data types supported: same as @p input - * @param[in] sum Sum of 1D logits tensor. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum); - /** Checks if the given size is eligible for parallel reduction - * - * @note Serial reduction is launched for width < (_grid_size * _serial_vector_size). - * @note Parallel reduction is launched for width >= (_grid_size * _serial_vector_size) and vector_size is forced to 4. - * - * @param[in] size Size to check - * - * @return A two-element tuple where the first element is a boolean specifying if a parallel reduction will be run, - * while the second element is the vector size of the execution. - */ - static ParallelReductionInfo is_parallel_reduction(size_t size); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_max; - ICLTensor *_output; - ICLTensor *_sum; - -private: - static const unsigned int _grid_size; - static const unsigned int _serial_vector_size; - static const unsigned int _parallel_vector_size; -}; -/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */ -class CLLogits1DNormKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLLogits1DNormKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLogits1DNormKernel(const CLLogits1DNormKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLogits1DNormKernel &operator=(const CLLogits1DNormKernel &) = delete; - /** Allow instances of this class to be moved */ - CLLogits1DNormKernel(CLLogits1DNormKernel &&) = default; - /** Allow instances of this class to be moved */ - CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported. - * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input - * @param[out] output Destination tensor. Data types supported: QASYMM8/QASYMM8_SIGNED for S32 @p input, or same as @p input - * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. - */ - void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported. - * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input - * @param[out] output Destination tensor. Data types supported: QASYMM8/QASYMM8_SIGNED for S32 @p input, or same as @p input - * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DNormKernel - * - * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported. - * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input - * @param[in] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input - * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, const SoftmaxKernelInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_sum; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h b/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h deleted file mode 100644 index 93221f7b5a..0000000000 --- a/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H -#define ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the space to batch kernel */ -class CLSpaceToBatchLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLSpaceToBatchLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSpaceToBatchLayerKernel(const CLSpaceToBatchLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSpaceToBatchLayerKernel &operator=(const CLSpaceToBatchLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSpaceToBatchLayerKernel(CLSpaceToBatchLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSpaceToBatchLayerKernel &operator=(CLSpaceToBatchLayerKernel &&) = default; - /** Default destructor */ - ~CLSpaceToBatchLayerKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output); - /** Initialise the kernel's inputs and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output); - /** Initialise the kernel's input and output. (Static block shape and paddings) - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] padding_left The left padding of the output tensor. - * @param[in] padding_right The right padding of the output tensor. - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output); - /** Initialise the kernel's input and output. (Static block shape and paddings) - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] padding_left The left padding of the output tensor. - * @param[in] padding_right The right padding of the output tensor. - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 - * @param[in] output Tensor output. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel (Static block shape and paddings) - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] padding_left The left padding of the output tensor. - * @param[in] padding_right The right padding of the output tensor. - * @param[in] output Tensor output. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - const ICLTensor *_block_shape; /**< Block shape tensor */ - const ICLTensor *_paddings; /**< Paddings tensor */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h b/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h deleted file mode 100644 index af0aa12598..0000000000 --- a/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H -#define ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the space to depth kernel */ -class CLSpaceToDepthLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLSpaceToDepthLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSpaceToDepthLayerKernel(const CLSpaceToDepthLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSpaceToDepthLayerKernel &operator=(const CLSpaceToDepthLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSpaceToDepthLayerKernel(CLSpaceToDepthLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSpaceToDepthLayerKernel &operator=(CLSpaceToDepthLayerKernel &&) = default; - /** Default destructor */ - ~CLSpaceToDepthLayerKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape value. - */ - void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape); - /** Initialise the kernel's inputs and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape value. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape); - /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToDepthLayerKernel. - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. - * @param[in] output Tensor output info. Data types supported: same as @p input - * @param[in] block_shape Block shape value. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - ICLTensor *_output; /**< Destination tensor */ - int32_t _block_shape; /**< Block shape */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLStackLayerKernel.h b/arm_compute/core/CL/kernels/CLStackLayerKernel.h deleted file mode 100644 index cfefcd97dd..0000000000 --- a/arm_compute/core/CL/kernels/CLStackLayerKernel.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLSTACKLAYERKERNEL_H -#define ARM_COMPUTE_CLSTACKLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/ -class CLStackLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLStackLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLStackLayerKernel(const CLStackLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLStackLayerKernel &operator=(const CLStackLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLStackLayerKernel(CLStackLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLStackLayerKernel &operator=(CLStackLayerKernel &&) = default; - /** Default destructor */ - ~CLStackLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @note Supported input tensor rank: up to 4 - * - * @param[in] input Input tensor. Data types supported: All. - * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. - * @param[in] idx_input Index of the input tensor in the list of tensors to stack. - * All tensors in the list must have the same shape - * @param[in] num_tensors Number of tensors to stack - * @param[out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output); - /** Initialise the kernel's inputs and output - * - * @note Supported input tensor rank: up to 4 - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. - * @param[in] idx_input Index of the input tensor in the list of tensors to stack. - * All tensors in the list must have the same shape - * @param[in] num_tensors Number of tensors to stack - * @param[out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLStackLayerKernel - * - * @note Supported input tensor rank: up to 4 - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. - * @param[in] idx_input Index of the input tensor in the list of tensors to stack - * All tensors in the list must have the same shape - * @param[in] num_tensors Number of tensors to stack - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLSTACKLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h b/arm_compute/core/CL/kernels/CLStridedSliceKernel.h deleted file mode 100644 index 74311b71fa..0000000000 --- a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H -#define ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -#include - -namespace arm_compute -{ -/** Interface for the kernel to perform tensor strided slicing */ -class CLStridedSliceKernel : public ICLKernel -{ -public: - /** Configure kernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor info. Data type supported: All. - * @param[out] output Destination tensor info. Data type supported: Same as @p input - * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. - * A slice of size 1 starting from starts[i] in the dimension must be preserved. - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); - - /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] input Source tensor. Data type supported: All. - * @param[in] output Destination tensor. Data type supported: Same as @p input - * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. - * A slice of size 1 starting from starts[i] in the dimension must be preserved. - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLTableLookupKernel.h b/arm_compute/core/CL/kernels/CLTableLookupKernel.h deleted file mode 100644 index 9f1d28c47a..0000000000 --- a/arm_compute/core/CL/kernels/CLTableLookupKernel.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLTABLELOOKUPKERNEL_H -#define ARM_COMPUTE_CLTABLELOOKUPKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; -class ICLLut; - -/** Interface for the kernel to perform table lookup calculations. */ -class CLTableLookupKernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input, lut and output. - * - * @param[in] input An input tensor. Data types supported: U8, S16. - * @param[in] lut The input LUT. Data types supported: U8, S16. - * @param[out] output The output tensor. Data types supported: U8, S16. - */ - void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output); - /** Initialise the kernel's input, lut and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8, S16. - * @param[in] lut The input LUT. Data types supported: U8, S16. - * @param[out] output The output tensor. Data types supported: U8, S16. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLTABLELOOKUPKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLThresholdKernel.h b/arm_compute/core/CL/kernels/CLThresholdKernel.h deleted file mode 100644 index 7e01fd6aaa..0000000000 --- a/arm_compute/core/CL/kernels/CLThresholdKernel.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLTHRESHOLDKERNEL_H -#define ARM_COMPUTE_CLTHRESHOLDKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** Interface for the thresholding kernel. */ -class CLThresholdKernel : public ICLSimple2DKernel -{ -public: - /**Initialise the kernel's input, output and threshold parameters. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] info Threshold descriptor - */ - void configure(const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info); - /**Initialise the kernel's input, output and threshold parameters. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] info Threshold descriptor - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLTileKernel.h b/arm_compute/core/CL/kernels/CLTileKernel.h deleted file mode 100644 index 56e1df8de3..0000000000 --- a/arm_compute/core/CL/kernels/CLTileKernel.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLTILEKERNEL_H -#define ARM_COMPUTE_CLTILEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform a Tile operation */ -class CLTileKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLTileKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLTileKernel(const CLTileKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLTileKernel &operator=(const CLTileKernel &) = delete; - /** Allow instances of this class to be moved */ - CLTileKernel(CLTileKernel &&) = default; - /** Allow instances of this class to be moved */ - CLTileKernel &operator=(CLTileKernel &&) = default; - /** Default destructor */ - ~CLTileKernel() = default; - /** Set the source, destination of the kernel - * - * @param[in] input Source tensor. Data type supported: All. - * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. - * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported). - * @param[out] output Destination tensor. Same as @p input - * - */ - void configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples); - /** Set the source, destination of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data type supported: All. - * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. - * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported). - * @param[out] output Destination tensor. Same as @p input - * - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples); - /** Static function to check if given info will lead to a valid configuration of @ref CLTileKernel - * - * @param[in] input Source tensor info. Data type supported: All. - * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. - * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported). - * @param[in] output Destination tensor info. Same as @p input - * - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLTILEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLTransposeKernel.h b/arm_compute/core/CL/kernels/CLTransposeKernel.h deleted file mode 100644 index 4a9887f2cf..0000000000 --- a/arm_compute/core/CL/kernels/CLTransposeKernel.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLTRANSPOSEKERNEL_H -#define ARM_COMPUTE_CLTRANSPOSEKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel which transposes the elements of a matrix. - * - * [width, height, batch] -> [height, width, batch] - * - */ -class CLTransposeKernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: All. - * @param[out] output Output tensor. Data type supported: Same as @p input - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[out] output Output tensor. Data type supported: Same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLTransposeKernel - * - * @param[in] input Input tensor. Data types supported: All. - * @param[in] output Output tensor. Data type supported: Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLTRANSPOSEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h b/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h deleted file mode 100644 index b523b97233..0000000000 --- a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H -#define ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the UpsampleLayer kernel on OpenCL. */ -class CLUpsampleLayerKernel : public ICLKernel -{ -public: - /** Constructor */ - CLUpsampleLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLUpsampleLayerKernel(const CLUpsampleLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLUpsampleLayerKernel &operator=(const CLUpsampleLayerKernel &) = delete; - /** Default Move Constructor. */ - CLUpsampleLayerKernel(CLUpsampleLayerKernel &&) = default; - /** Default move assignment operator */ - CLUpsampleLayerKernel &operator=(CLUpsampleLayerKernel &&) = default; - /** Default destructor */ - ~CLUpsampleLayerKernel() = default; - - /** Initialise the kernel's input and output. - * - * @param[in] input Source tensor. Data types supported: All. - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] info Contains stride information described in @ref Size2D. - * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. - */ - void configure(const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: All. - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] info Contains stride information described in @ref Size2D. - * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy); - /** Static function to check if given info will lead to a valid configuration of @ref CLUpsampleLayerKernel - * - * @param[in] input Source tensor info. Data types supported: All. - * @param[in] output Destination tensor info. Data types supported: same as @p input. - * @param[in] info Contains stride information described in @ref Size2D. - * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy upsampling_policy); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - Size2D _info; - DataLayout _data_layout; - unsigned int _num_elems_processed_per_iteration_input_x; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWarpAffineKernel.h b/arm_compute/core/CL/kernels/CLWarpAffineKernel.h deleted file mode 100644 index 440febab96..0000000000 --- a/arm_compute/core/CL/kernels/CLWarpAffineKernel.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLWARPAFFINEKERNEL_H -#define ARM_COMPUTE_CLWARPAFFINEKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the warp affine kernel.*/ -class CLWarpAffineKernel : public ICLSimple2DKernel -{ -public: - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8. - * @param[in] matrix The perspective matrix. Must be 2x3 of type float - * The matrix argument requires 9 values, the last 3 values are ignored. - * @param[in] policy The interpolation type. - */ - void configure(const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8. - * @param[in] matrix The perspective matrix. Must be 2x3 of type float - * The matrix argument requires 9 values, the last 3 values are ignored. - * @param[in] policy The interpolation type. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLWARPAFFINEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h b/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h deleted file mode 100644 index 6614989059..0000000000 --- a/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H -#define ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; -/** Interface for the warp perspective kernel.*/ -class CLWarpPerspectiveKernel : public ICLSimple2DKernel -{ -public: - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8. - * @param[in] matrix The perspective matrix. Must be 3x3 of type float. - * @param[in] policy The interpolation type. - */ - void configure(const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8. - * @param[in] matrix The perspective matrix. Must be 3x3 of type float. - * @param[in] policy The interpolation type. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h deleted file mode 100644 index c74255bac0..0000000000 --- a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H -#define ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** OpenCL kernel to perform reshaping on the weights used by convolution and locally connected layer - * - * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. - * In combination with the @ref CLIm2ColKernel can transform a convolution to a matrix multiplication. - * - * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have: - * @f[ - * \left( \begin{array}{ccc} - * a000 & a001 & a002 \\ - * a010 & a011 & a012 \\ - * a020 & a021 & a022 \\ - * \end{array} \right) - * \left( \begin{array}{ccc} - * a100 & a101 & a102 \\ - * a110 & a111 & a112 \\ - * a120 & a121 & a122 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccccccccc} - * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\ - * \end{array} \right) - * @f] - */ -class CLWeightsReshapeKernel : public ICLKernel -{ -public: - /** Constructor.*/ - CLWeightsReshapeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWeightsReshapeKernel(const CLWeightsReshapeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWeightsReshapeKernel &operator=(const CLWeightsReshapeKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWeightsReshapeKernel(CLWeightsReshapeKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWeightsReshapeKernel &operator=(CLWeightsReshapeKernel &&) = default; - /** Default destructor */ - ~CLWeightsReshapeKernel() = default; - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, - * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All - * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with - * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr. - * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. - * @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise. - * Data types supported: Same as @p input - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout - * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. - */ - void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1); - /** Set the input and output of the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, - * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All - * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with - * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr. - * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. - * @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise. - * Data types supported: Same as @p input - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout - * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1); - /** Static function to check if given info will lead to a valid configuration of @ref CLWeightsReshapeKernel - * - * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, - * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All - * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with - * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr. - * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. - * @param[in] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise. - * Data types supported: Same as @p input - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout - * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups = 1); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_biases; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H */ \ No newline at end of file diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h deleted file mode 100644 index a379b5f0b8..0000000000 --- a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H -#define ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -/** Interface for the width concatenate kernel of 2 tensors. - * The input1 and input2 tensors will be concatenated into the output tensor. - */ -class CLWidthConcatenate2TensorsKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWidthConcatenate2TensorsKernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate2TensorsKernel(const CLWidthConcatenate2TensorsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate2TensorsKernel &operator=(const CLWidthConcatenate2TensorsKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWidthConcatenate2TensorsKernel(CLWidthConcatenate2TensorsKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWidthConcatenate2TensorsKernel &operator=(CLWidthConcatenate2TensorsKernel &&) = default; - /** Default destructor */ - ~CLWidthConcatenate2TensorsKernel() = default; - /** Initialise the kernel's input1s and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 First input tensor. Data types supported: All. - * @param[in] input2 Second input tensor. Data types supported: same as @p input1 - * @param[out] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate2TensorsKernel - * - * @param[in] input1 First tensor info. Data types supported: All. - * @param[in] input2 Second tensor info. Data types supported: same as @p input1 - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h deleted file mode 100644 index 6b0e8ee21d..0000000000 --- a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H -#define ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -/** Interface for the width concatenate kernel of 4 tensors. - * All input tensors will be concatenated into the output tensor. - */ -class CLWidthConcatenate4TensorsKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWidthConcatenate4TensorsKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate4TensorsKernel(const CLWidthConcatenate4TensorsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate4TensorsKernel &operator=(const CLWidthConcatenate4TensorsKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWidthConcatenate4TensorsKernel(CLWidthConcatenate4TensorsKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWidthConcatenate4TensorsKernel &operator=(CLWidthConcatenate4TensorsKernel &&) = default; - /** Default destructor */ - ~CLWidthConcatenate4TensorsKernel() = default; - /** Initialise the kernel's input1s and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 First input tensor. Data types supported: All. - * @param[in] input2 Second input tensor. Data types supported: same as @p input1 - * @param[in] input3 Third input tensor. Data types supported: same as @p input1 - * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1 - * @param[out] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *input3, ITensorInfo *input4, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate4TensorsKernel - * - * @param[in] input1 First tensor info. Data types supported: All. - * @param[in] input2 Second tensor info. Data types supported: same as @p input1 - * @param[in] input3 Third tensor info. Data types supported: same as @p input1 - * @param[in] input4 Fourth tensor info. Data types supported: same as @p input1 - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h deleted file mode 100644 index 16cf167b25..0000000000 --- a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H -#define ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -/** Interface for the width concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLWidthConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWidthConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenateLayerKernel(const CLWidthConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenateLayerKernel &operator=(const CLWidthConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWidthConcatenateLayerKernel(CLWidthConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWidthConcatenateLayerKernel &operator=(CLWidthConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLWidthConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[in] width_offset The offset on the X axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int width_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] width_offset The offset on the X axis. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h deleted file mode 100644 index b689be820f..0000000000 --- a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H -#define ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the Winograd filter transform kernel. */ -class CLWinogradFilterTransformKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWinogradFilterTransformKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWinogradFilterTransformKernel(const CLWinogradFilterTransformKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWinogradFilterTransformKernel &operator=(const CLWinogradFilterTransformKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWinogradFilterTransformKernel(CLWinogradFilterTransformKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWinogradFilterTransformKernel &operator=(CLWinogradFilterTransformKernel &&) = default; - /** Default destructor */ - ~CLWinogradFilterTransformKernel() = default; - /** Set the input and output tensor. - * - * @note Winograd filter transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd filter transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32. - * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo - */ - void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); - /** Set the input and output tensor. - * - * @note Winograd filter transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd filter transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32. - * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradFilterTransformKernel - * - * @note Winograd filter transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd filter transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32. - * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h deleted file mode 100644 index 4f198f034a..0000000000 --- a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H -#define ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform Winograd input transform.*/ -class CLWinogradInputTransformKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWinogradInputTransformKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWinogradInputTransformKernel(const CLWinogradInputTransformKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWinogradInputTransformKernel &operator=(const CLWinogradInputTransformKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWinogradInputTransformKernel(CLWinogradInputTransformKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWinogradInputTransformKernel &operator=(CLWinogradInputTransformKernel &&) = default; - /** Set the input and output of the kernel. - * - * @note Winograd input transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd input transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] input The input tensor to transform. Data types supported: F16/F32 - * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. - */ - void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); - /** Set the input and output of the kernel. - * - * @note Winograd input transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd input transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] compile_context The compile context to be used. - * @param[in] input The input tensor to transform. Data types supported: F16/F32 - * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransformKernel - * - * @note Winograd input transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd input transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] input The input tensor to transform. Data types supported: F16/F32 - * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - using WinogradKey = std::pair, std::pair>; - - BorderSize _border_size; - const ICLTensor *_input; - ICLTensor *_output; - DataLayout _data_layout; - int _num_tiles_x; - int _num_tiles_y; - unsigned int _step_z; -}; -} // arm_compute -#endif /*ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h deleted file mode 100644 index f7cbd05020..0000000000 --- a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H -#define ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the Winograd output transform kernel. */ -class CLWinogradOutputTransformKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWinogradOutputTransformKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWinogradOutputTransformKernel(const CLWinogradOutputTransformKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWinogradOutputTransformKernel &operator=(const CLWinogradOutputTransformKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWinogradOutputTransformKernel(CLWinogradOutputTransformKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWinogradOutputTransformKernel &operator=(CLWinogradOutputTransformKernel &&) = default; - /** Default destructor */ - ~CLWinogradOutputTransformKernel() = default; - /** Set the input and output tensor. - * - * @note Winograd output transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd output transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32. - * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input - * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Set the input and output tensor. - * - * @note Winograd output transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd output transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32. - * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input - * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradOutputTransformKernel - * - * @note Winograd output transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd output transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32. - * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input - * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo - * @param[in] act_info (Optional) Activation layer information in case of a fused activation @ref ActivationLayerInfo. Only RELU, BOUNDED_RELU, LU_BOUNDED_RELU, LEAKY_RELU and SOFT_RELU supported. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - using WinogradKey = std::pair, std::pair>; - - const ICLTensor *_input; - const ICLTensor *_bias; - ICLTensor *_output; - bool _is_nhwc; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h b/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h deleted file mode 100644 index 52b069868e..0000000000 --- a/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLYOLOLAYERKERNEL_H -#define ARM_COMPUTE_CLYOLOLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the YOLO layer kernel that performs partial activation. - * For each box, activate only: - * - x and y position (channel 0 and 1 of each box) - * - objectiveness (channel 4 of each box) - * - classes (channel 5 to (classes - 5) of each box) - */ -class CLYOLOLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLYOLOLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLYOLOLayerKernel(const CLYOLOLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLYOLOLayerKernel &operator=(const CLYOLOLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLYOLOLayerKernel(CLYOLOLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLYOLOLayerKernel &operator=(CLYOLOLayerKernel &&) = default; - /** Default destructor */ - ~CLYOLOLayerKernel() = default; - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr, the activation function will be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) - */ - void configure(ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr, the activation function will be performed in-place - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) - */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); - /** Static function to check if given info will lead to a valid configuration of @ref CLYOLOLayerKernel - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result - * of the activation function. Data types supported: F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; - ICLTensor *_output; - bool _run_in_place; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLYOLOLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h deleted file mode 100644 index d182e386b8..0000000000 --- a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H -#define ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor. - */ -class ICLDepthwiseConvolutionLayer3x3Kernel : public ICLKernel -{ -public: - /** Default constructor */ - ICLDepthwiseConvolutionLayer3x3Kernel() - : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_y(1), _output_multipliers(), _output_shifts(), _is_quantized(false) - { - } - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICLDepthwiseConvolutionLayer3x3Kernel(const ICLDepthwiseConvolutionLayer3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICLDepthwiseConvolutionLayer3x3Kernel &operator=(const ICLDepthwiseConvolutionLayer3x3Kernel &) = delete; - /** Default Move Constructor. */ - ICLDepthwiseConvolutionLayer3x3Kernel(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default; - /** Default move assignment operator */ - ICLDepthwiseConvolutionLayer3x3Kernel &operator=(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default; - /** Initialize the function's source, destination, conv and border_size. - * - * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. - * Data type supported: Same as @p input, QASYMM8/QSYMM8_PER_CHANNEL when input is QASYMM8. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported for QASYMM8. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - */ - virtual void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0; - /** Initialize the function's source, destination, conv and border_size. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. - * Data type supported: Same as @p input, QASYMM8/QSYMM8_PER_CHANNEL when input is QASYMM8. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported for QASYMM8. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - */ - virtual void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0; - -protected: - BorderSize _border_size; - const ICLTensor *_input; - ICLTensor *_output; - const ICLTensor *_weights; - const ICLTensor *_biases; - unsigned int _conv_stride_y; - const ICLTensor *_output_multipliers; - const ICLTensor *_output_shifts; - bool _is_quantized; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H */ diff --git a/arm_compute/runtime/CL/ICLOperator.h b/arm_compute/runtime/CL/ICLOperator.h index 526b7e93e9..38bcaf32f2 100644 --- a/arm_compute/runtime/CL/ICLOperator.h +++ b/arm_compute/runtime/CL/ICLOperator.h @@ -24,7 +24,8 @@ #ifndef ARM_COMPUTE_ICLOPERATOR_H #define ARM_COMPUTE_ICLOPERATOR_H -#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + #include "arm_compute/runtime/IOperator.h" #include "arm_compute/runtime/IRuntimeContext.h" #include "arm_compute/runtime/Types.h" @@ -33,6 +34,7 @@ namespace arm_compute { +class ICLKernel; namespace experimental { /** Basic interface for functions which have a single async CL kernel */ diff --git a/arm_compute/runtime/CL/ICLSimpleFunction.h b/arm_compute/runtime/CL/ICLSimpleFunction.h index 4b1d5b1485..310bf770c4 100644 --- a/arm_compute/runtime/CL/ICLSimpleFunction.h +++ b/arm_compute/runtime/CL/ICLSimpleFunction.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_ICLSIMPLEFUNCTION_H #define ARM_COMPUTE_ICLSIMPLEFUNCTION_H -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/runtime/IFunction.h" #include @@ -34,6 +32,8 @@ namespace arm_compute { // Forward declarations class CLRuntimeContext; +class CLFillBorderKernel; +class ICLKernel; /** Basic interface for functions which have a single OpenCL kernel */ class ICLSimpleFunction : public IFunction @@ -53,14 +53,16 @@ public: ICLSimpleFunction &operator=(const ICLSimpleFunction &) = delete; /** Default move assignment operator */ ICLSimpleFunction &operator=(ICLSimpleFunction &&) = default; + /** Default destructor */ + ~ICLSimpleFunction(); // Inherited methods overridden: void run() override final; protected: - std::unique_ptr _kernel; /**< Kernel to run */ - CLFillBorderKernel _border_handler; /**< Kernel to handle borders */ - CLRuntimeContext *_ctx; /**< Context to use */ + std::unique_ptr _kernel; /**< Kernel to run */ + std::unique_ptr _border_handler; /**< Kernel to handle borders */ + CLRuntimeContext *_ctx; /**< Context to use */ }; } // namespace arm_compute #endif /*ARM_COMPUTE_ICLSIMPLEFUNCTION_H */ diff --git a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h index b0f1948beb..f2831e2a99 100644 --- a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h +++ b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h @@ -28,6 +28,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to run @ref CLAbsoluteDifferenceKernel diff --git a/arm_compute/runtime/CL/functions/CLAccumulate.h b/arm_compute/runtime/CL/functions/CLAccumulate.h index 9dbf13b873..20d3476d2e 100644 --- a/arm_compute/runtime/CL/functions/CLAccumulate.h +++ b/arm_compute/runtime/CL/functions/CLAccumulate.h @@ -30,6 +30,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to run @ref CLAccumulateKernel */ diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h index 632487c78d..dc2cb62b71 100644 --- a/arm_compute/runtime/CL/functions/CLActivationLayer.h +++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h @@ -31,7 +31,9 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLActivationLayerKernel * diff --git a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h index dc0c37e860..c254284cd7 100644 --- a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_CLARGMINMAXLAYER_H #define ARM_COMPUTE_CLARGMINMAXLAYER_H -#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLReshapeLayer.h" @@ -36,6 +35,7 @@ namespace arm_compute { class ITensorInfo; class ICLTensor; +class CLArgMinMaxLayerKernel; /** Function to calculate the index of the minimum or maximum values in a * tensor based on an axis. @@ -53,6 +53,16 @@ public: * @param[in] memory_manager (Optional) Memory manager. */ CLArgMinMaxLayer(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied */ + CLArgMinMaxLayer(const CLArgMinMaxLayer &) = delete; + /** Prevent instances of this class from being copied */ + CLArgMinMaxLayer &operator=(const CLArgMinMaxLayer &) = delete; + /** Prevent instances of this class to be moved */ + CLArgMinMaxLayer(CLArgMinMaxLayer &&) = delete; + /** Prevent instances of this class to be moved */ + CLArgMinMaxLayer &operator=(CLArgMinMaxLayer &&) = delete; + /** Default destructor */ + ~CLArgMinMaxLayer(); /** Set the input and output tensors. * * @param[in] input Input source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. @@ -85,13 +95,13 @@ public: void run() override; private: - MemoryGroup _memory_group; - std::vector _results_vector; - CLTensor _not_reshaped_output; - std::vector _reduction_kernels_vector; - CLReshapeLayer _reshape; - unsigned int _num_of_stages; - unsigned int _reduction_axis; + MemoryGroup _memory_group; + std::vector _results_vector; + CLTensor _not_reshaped_output; + std::vector> _reduction_kernels_vector; + CLReshapeLayer _reshape; + unsigned int _num_of_stages; + unsigned int _reduction_axis; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLARGMINMAXLAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h index c22991da7c..c8acf9fc6b 100644 --- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h @@ -26,12 +26,16 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h" #include "arm_compute/core/Types.h" +#include + namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; +class CLBatchNormalizationLayerKernel; /** Basic function to run @ref CLNormalizationLayerKernel and simulate a batch normalization layer. * @@ -44,6 +48,16 @@ class CLBatchNormalizationLayer : public IFunction public: /** Default constructor */ CLBatchNormalizationLayer(); + /** Prevent instances of this class from being copied */ + CLBatchNormalizationLayer(const CLBatchNormalizationLayer &) = delete; + /** Prevent instances of this class from being copied */ + CLBatchNormalizationLayer &operator=(const CLBatchNormalizationLayer &) = delete; + /** Prevent instances of this class to be moved */ + CLBatchNormalizationLayer(CLBatchNormalizationLayer &&) = delete; + /** Prevent instances of this class to be moved */ + CLBatchNormalizationLayer &operator=(CLBatchNormalizationLayer &&) = delete; + /** Default destructor */ + ~CLBatchNormalizationLayer(); /** Set the input and output tensors. * * @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place @@ -104,7 +118,7 @@ public: void run() override; private: - CLBatchNormalizationLayerKernel _norm_kernel; /**< BatchNormalization layer kernel to run */ + std::unique_ptr _norm_kernel; /**< BatchNormalization layer kernel to run */ }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h index ba57921cc2..bdb58531d0 100644 --- a/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h +++ b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h @@ -26,11 +26,15 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h" #include "arm_compute/core/Types.h" +#include + namespace arm_compute { +class CLCompileContext; +class ITensorInfo; +class CLBatchToSpaceLayerKernel; class ICLTensor; /** Basic function to run @ref CLBatchToSpaceLayerKernel. */ @@ -39,6 +43,16 @@ class CLBatchToSpaceLayer : public IFunction public: /** Default constructor */ CLBatchToSpaceLayer(); + /** Prevent instances of this class from being copied */ + CLBatchToSpaceLayer(const CLBatchToSpaceLayer &) = delete; + /** Prevent instances of this class from being copied */ + CLBatchToSpaceLayer &operator=(const CLBatchToSpaceLayer &) = delete; + /** Prevent instances of this class to be moved */ + CLBatchToSpaceLayer(CLBatchToSpaceLayer &&) = delete; + /** Prevent instances of this class to be moved */ + CLBatchToSpaceLayer &operator=(CLBatchToSpaceLayer &&) = delete; + /** Default destructor */ + ~CLBatchToSpaceLayer(); /** Set the input and output tensors. * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. @@ -95,7 +109,7 @@ public: void run() override; private: - CLBatchToSpaceLayerKernel _batch_to_space_kernel; /**< CLBatchToSpaceLayerKernel to run */ + std::unique_ptr _batch_to_space_kernel; /**< CLBatchToSpaceLayerKernel to run */ }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_CLBATCHTOSPACELAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h index 3c28938807..bf5993f4b0 100644 --- a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h +++ b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h @@ -28,6 +28,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to run @ref CLBitwiseAndKernel. diff --git a/arm_compute/runtime/CL/functions/CLBitwiseNot.h b/arm_compute/runtime/CL/functions/CLBitwiseNot.h index 4c21d5647f..1d8531a176 100644 --- a/arm_compute/runtime/CL/functions/CLBitwiseNot.h +++ b/arm_compute/runtime/CL/functions/CLBitwiseNot.h @@ -28,6 +28,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to run @ref CLBitwiseNotKernel. diff --git a/arm_compute/runtime/CL/functions/CLBitwiseOr.h b/arm_compute/runtime/CL/functions/CLBitwiseOr.h index 8a481737e3..7876cbf196 100644 --- a/arm_compute/runtime/CL/functions/CLBitwiseOr.h +++ b/arm_compute/runtime/CL/functions/CLBitwiseOr.h @@ -28,6 +28,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to run @ref CLBitwiseOrKernel. diff --git a/arm_compute/runtime/CL/functions/CLBitwiseXor.h b/arm_compute/runtime/CL/functions/CLBitwiseXor.h index 6928e59d38..4f054062cd 100644 --- a/arm_compute/runtime/CL/functions/CLBitwiseXor.h +++ b/arm_compute/runtime/CL/functions/CLBitwiseXor.h @@ -28,6 +28,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to run @ref CLBitwiseXorKernel. diff --git a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h index 5e4e89071b..d6409106da 100644 --- a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h +++ b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h @@ -24,12 +24,16 @@ #ifndef ARM_COMPUTE_CLBOUNDINGBOXTRANSOFORM_H #define ARM_COMPUTE_CLBOUNDINGBOXTRANSOFORM_H -#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h" +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { +class CLCompileContext; +class CLBoundingBoxTransformKernel; +class BoundingBoxTransformInfo; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLBoundingBoxTransformKernel. * diff --git a/arm_compute/runtime/CL/functions/CLBox3x3.h b/arm_compute/runtime/CL/functions/CLBox3x3.h index 2d2aa4705c..cff780614c 100644 --- a/arm_compute/runtime/CL/functions/CLBox3x3.h +++ b/arm_compute/runtime/CL/functions/CLBox3x3.h @@ -31,6 +31,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to execute box filter 3x3. This function calls the following OpenCL kernels: diff --git a/arm_compute/runtime/CL/functions/CLCannyEdge.h b/arm_compute/runtime/CL/functions/CLCannyEdge.h index f9d9f8f66a..9e41c31728 100644 --- a/arm_compute/runtime/CL/functions/CLCannyEdge.h +++ b/arm_compute/runtime/CL/functions/CLCannyEdge.h @@ -26,8 +26,6 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h" -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -36,6 +34,11 @@ namespace arm_compute { +class CLCompileContext; +class CLFillBorderKernel; +class CLGradientKernel; +class CLEdgeNonMaxSuppressionKernel; +class CLEdgeTraceKernel; class ICLTensor; /** Basic function to execute canny edge on OpenCL. This function calls the following OpenCL kernels and functions: @@ -56,6 +59,8 @@ public: CLCannyEdge(const CLCannyEdge &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ CLCannyEdge &operator=(const CLCannyEdge &) = delete; + /** Default destructor */ + ~CLCannyEdge(); /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode. * * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for border_mode != UNDEFINED) @@ -88,20 +93,20 @@ public: virtual void run() override; private: - MemoryGroup _memory_group; /**< Function's memory group */ - std::unique_ptr _sobel; /**< Pointer to Sobel kernel. */ - CLGradientKernel _gradient; /**< Gradient kernel. */ - CLFillBorderKernel _border_mag_gradient; /**< Fill border on magnitude tensor kernel */ - CLEdgeNonMaxSuppressionKernel _non_max_suppr; /**< Non-Maxima suppression kernel. */ - CLEdgeTraceKernel _edge_trace; /**< Edge tracing kernel. */ - CLImage _gx; /**< Source tensor - Gx component. */ - CLImage _gy; /**< Source tensor - Gy component. */ - CLImage _mag; /**< Source tensor - Magnitude. */ - CLImage _phase; /**< Source tensor - Phase. */ - CLImage _nonmax; /**< Source tensor - Non-Maxima suppressed. */ - CLImage _visited, _recorded, _l1_list_counter, _l1_stack; /**< Temporary tensors */ - ICLTensor *_output; /**< Output tensor provided by the user. */ + MemoryGroup _memory_group; /**< Function's memory group */ + std::unique_ptr _sobel; /**< Pointer to Sobel kernel. */ + std::unique_ptr _gradient; /**< Gradient kernel. */ + std::unique_ptr _border_mag_gradient; /**< Fill border on magnitude tensor kernel */ + std::unique_ptr _non_max_suppr; /**< Non-Maxima suppression kernel. */ + std::unique_ptr _edge_trace; /**< Edge tracing kernel. */ + CLImage _gx; /**< Source tensor - Gx component. */ + CLImage _gy; /**< Source tensor - Gy component. */ + CLImage _mag; /**< Source tensor - Magnitude. */ + CLImage _phase; /**< Source tensor - Phase. */ + CLImage _nonmax; /**< Source tensor - Non-Maxima suppressed. */ + CLImage _visited, _recorded, _l1_list_counter, _l1_stack; /**< Temporary tensors */ + ICLTensor *_output; /**< Output tensor provided by the user. */ }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_CLCANNYEDGE_H */ diff --git a/arm_compute/runtime/CL/functions/CLCast.h b/arm_compute/runtime/CL/functions/CLCast.h index 592368d135..bd333d4e72 100644 --- a/arm_compute/runtime/CL/functions/CLCast.h +++ b/arm_compute/runtime/CL/functions/CLCast.h @@ -31,7 +31,9 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLDepthConvertLayerKernel. */ class CLCast : public ICLSimpleFunction diff --git a/arm_compute/runtime/CL/functions/CLChannelCombine.h b/arm_compute/runtime/CL/functions/CLChannelCombine.h index 4e3d10cc10..5927662fc2 100644 --- a/arm_compute/runtime/CL/functions/CLChannelCombine.h +++ b/arm_compute/runtime/CL/functions/CLChannelCombine.h @@ -28,6 +28,7 @@ namespace arm_compute { +class CLCompileContext; class ICLMultiImage; class ICLTensor; using ICLImage = ICLTensor; diff --git a/arm_compute/runtime/CL/functions/CLChannelExtract.h b/arm_compute/runtime/CL/functions/CLChannelExtract.h index cf042b4519..9ce9bcdd8a 100644 --- a/arm_compute/runtime/CL/functions/CLChannelExtract.h +++ b/arm_compute/runtime/CL/functions/CLChannelExtract.h @@ -29,6 +29,7 @@ namespace arm_compute { +class CLCompileContext; class ICLMultiImage; class ICLTensor; using ICLImage = ICLTensor; diff --git a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h index e0bb3d01c9..54cf59f59a 100644 --- a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h +++ b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h @@ -24,11 +24,14 @@ #ifndef ARM_COMPUTE_CLCHANNELSHUFFLELAYER_H #define ARM_COMPUTE_CLCHANNELSHUFFLELAYER_H +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLChannelShuffleLayerKernel * diff --git a/arm_compute/runtime/CL/functions/CLColorConvert.h b/arm_compute/runtime/CL/functions/CLColorConvert.h index e4017c2686..47bcabfb63 100644 --- a/arm_compute/runtime/CL/functions/CLColorConvert.h +++ b/arm_compute/runtime/CL/functions/CLColorConvert.h @@ -28,6 +28,7 @@ namespace arm_compute { +class CLCompileContext; class ICLMultiImage; class ICLTensor; using ICLImage = ICLTensor; diff --git a/arm_compute/runtime/CL/functions/CLComparison.h b/arm_compute/runtime/CL/functions/CLComparison.h index c6d61e45f2..8cc3e96ec5 100644 --- a/arm_compute/runtime/CL/functions/CLComparison.h +++ b/arm_compute/runtime/CL/functions/CLComparison.h @@ -30,7 +30,9 @@ namespace arm_compute { // Forward declarations +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLComparisonKernel */ class CLComparison : public ICLSimpleFunction diff --git a/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h b/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h index a2f1a4eb66..d6a2ab423d 100644 --- a/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h +++ b/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h @@ -24,12 +24,15 @@ #ifndef ARM_COMPUTE_CLCOMPUTEALLANCHORS_H #define ARM_COMPUTE_CLCOMPUTEALLANCHORS_H -#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h" +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; +class ComputeAnchorsInfo; /** Basic function to run @ref CLComputeAllAnchorsKernel. * diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h index f535c8ea97..5e7003a112 100644 --- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h +++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h @@ -27,7 +27,6 @@ #include "arm_compute/runtime/CL/ICLOperator.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/Types.h" #include @@ -36,7 +35,9 @@ namespace arm_compute { // Forward declarations +class CLCompileContext; class ICLTensor; +class ICLKernel; class ITensorInfo; class Status; diff --git a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h index 9298be2e53..75a3d3213e 100644 --- a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h +++ b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h @@ -24,14 +24,17 @@ #ifndef ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTS_H #define ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTS_H -#include "arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" #include "arm_compute/runtime/ITransformWeights.h" namespace arm_compute { +class CLCompileContext; +class CLConvertFullyConnectedWeightsKernel; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLConvertFullyConnectedWeightsKernel. */ class CLConvertFullyConnectedWeights : public ICLSimpleFunction diff --git a/arm_compute/runtime/CL/functions/CLConvolution.h b/arm_compute/runtime/CL/functions/CLConvolution.h index c06ad0d969..4a1631a702 100644 --- a/arm_compute/runtime/CL/functions/CLConvolution.h +++ b/arm_compute/runtime/CL/functions/CLConvolution.h @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_CLCONVOLUTION_H #define ARM_COMPUTE_CLCONVOLUTION_H -#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" @@ -38,6 +36,13 @@ namespace arm_compute { +template +class CLConvolutionKernel; +template +class CLSeparableConvolutionHorKernel; +template +class CLSeparableConvolutionVertKernel; +class CLFillBorderKernel; class ICLTensor; /** Basic function to execute convolution of size 3x3. This function calls the following OpenCL kernels: @@ -85,6 +90,16 @@ class CLConvolutionSquare : public IFunction public: /** Default constructor */ CLConvolutionSquare(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvolutionSquare(const CLConvolutionSquare &) = delete; + /** Default move constructor */ + CLConvolutionSquare(CLConvolutionSquare &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvolutionSquare &operator=(const CLConvolutionSquare &) = delete; + /** Default move assignment operator */ + CLConvolutionSquare &operator=(CLConvolutionSquare &&) = default; + /** Default destructor */ + ~CLConvolutionSquare(); /** Initialize the function's source, destination, conv and border_mode. * * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) @@ -111,13 +126,13 @@ public: void run() override; private: - MemoryGroup _memory_group; /**< Function's memory group */ - CLTensor _tmp; /**< temporary buffer for output of horizontal pass */ - bool _is_separable; /**< true if the convolution can be separated */ - CLSeparableConvolutionHorKernel _kernel_hor; /**< kernel for horizontal pass of separated convolution */ - CLSeparableConvolutionVertKernel _kernel_vert; /**< kernel for vertical pass of separated convolution */ - CLConvolutionKernel _kernel; /**< kernel for non-separated convolution **/ - CLFillBorderKernel _border_handler; /**< kernel for border handling */ + MemoryGroup _memory_group; /**< Function's memory group */ + CLTensor _tmp; /**< temporary buffer for output of horizontal pass */ + bool _is_separable; /**< true if the convolution can be separated */ + std::unique_ptr> _kernel_hor; /**< kernel for horizontal pass of separated convolution */ + std::unique_ptr> _kernel_vert; /**< kernel for vertical pass of separated convolution */ + std::unique_ptr> _kernel; /**< kernel for non-separated convolution **/ + std::unique_ptr _border_handler; /**< kernel for border handling */ }; /** Basic function to run 5x5 convolution. */ diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h index ac36523682..d1de721193 100644 --- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h @@ -73,6 +73,16 @@ class CLConvolutionLayer : public IFunction public: /** Default constructor */ CLConvolutionLayer(std::shared_ptr memory_manager = nullptr); + /** Default Destructor */ + ~CLConvolutionLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvolutionLayer(const CLConvolutionLayer &) = delete; + /** Default move constructor */ + CLConvolutionLayer(CLConvolutionLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvolutionLayer &operator=(const CLConvolutionLayer &) = delete; + /** Default move assignment operator */ + CLConvolutionLayer &operator=(CLConvolutionLayer &&) = default; /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], diff --git a/arm_compute/runtime/CL/functions/CLCopy.h b/arm_compute/runtime/CL/functions/CLCopy.h index c20d75eea8..f1a091df84 100644 --- a/arm_compute/runtime/CL/functions/CLCopy.h +++ b/arm_compute/runtime/CL/functions/CLCopy.h @@ -31,7 +31,9 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; class CLCopy : public ICLSimpleFunction { diff --git a/arm_compute/runtime/CL/functions/CLCropResize.h b/arm_compute/runtime/CL/functions/CLCropResize.h index e940928b90..e781cfe61f 100644 --- a/arm_compute/runtime/CL/functions/CLCropResize.h +++ b/arm_compute/runtime/CL/functions/CLCropResize.h @@ -25,9 +25,7 @@ #define ARM_COMPUTE_CL_CROP_RESIZE_H #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLCopyKernel.h" -#include "arm_compute/core/CL/kernels/CLCropKernel.h" -#include "arm_compute/core/CL/kernels/CLMemsetKernel.h" + #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLScale.h" @@ -37,7 +35,11 @@ namespace arm_compute { // Forward Declarations +class CLCompileContext; +class CLCopyKernel; +class CLCropKernel; class ITensor; +class ITensorInfo; /** Function to perform cropping and resizing */ class CLCropResize : public IFunction @@ -54,7 +56,7 @@ public: /** Allow instances of this class to be moved */ CLCropResize &operator=(CLCropResize &&) = default; /** Default destructor */ - virtual ~CLCropResize() = default; + ~CLCropResize(); /** Configure kernel * diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h index 19a44f7b93..3ebc858d32 100644 --- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h +++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h @@ -24,17 +24,20 @@ #ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H #define ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h" -#include "arm_compute/core/CL/kernels/CLMemsetKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IFunction.h" + +#include namespace arm_compute { // Forward declarations +class CLDeconvolutionLayerUpsampleKernel; +class CLCompileContext; +class CLMemsetKernel; class ICLTensor; +class ITensorInfo; /** Basic function to execute deconvolution upsample on OpenCL. This function calls the following OpenCL kernels and functions: * @@ -55,7 +58,7 @@ public: /** Allow instances of this class to be moved */ CLDeconvolutionLayerUpsample &operator=(CLDeconvolutionLayerUpsample &&) = default; /** Default destructor */ - virtual ~CLDeconvolutionLayerUpsample() = default; + ~CLDeconvolutionLayerUpsample(); /** Initialize the function's source, destination, interpolation type and border_mode. * @@ -86,9 +89,9 @@ public: void run() override; private: - CLDeconvolutionLayerUpsampleKernel _upsample; - CLMemsetKernel _memset; - ICLTensor *_output; + std::unique_ptr _upsample; + std::unique_ptr _memset; + ICLTensor *_output; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H */ diff --git a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h index d125584c97..b0f297aec5 100644 --- a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h @@ -31,7 +31,9 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLDepthConvertLayerKernel. */ class CLDepthConvertLayer : public ICLSimpleFunction diff --git a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h index 5e197cb9b8..a0aa288dbf 100644 --- a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h @@ -29,7 +29,9 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLDepthToSpaceLayerKernel. */ class CLDepthToSpaceLayer : public ICLSimpleFunction diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h index 558c4540fa..8e594bc09f 100644 --- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h @@ -24,12 +24,6 @@ #ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H #define ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h" -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" -#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLPermute.h" @@ -38,6 +32,11 @@ namespace arm_compute { +class CLCompileContext; +class CLFillBorderKernel; +class CLDepthwiseConvolutionLayerNativeKernel; +class CLDepthwiseConvolutionLayerReshapeWeightsKernel; +class ICLDepthwiseConvolutionLayer3x3Kernel; class ICLTensor; /** Function to execute a depthwise convolution @@ -55,6 +54,8 @@ public: CLDepthwiseConvolutionLayer &operator=(const CLDepthwiseConvolutionLayer &) = delete; /** Default move assignment operator */ CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default; + /** Default destructor */ + ~CLDepthwiseConvolutionLayer(); /** Initialize the function's source, destination, weights and convolution information. * * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP16/FP32. Data layout supported: NHWC, NCHW @@ -211,25 +212,25 @@ private: }; private: - MemoryGroup _memory_group; - std::unique_ptr _kernel; - CLFillBorderKernel _border_handler; - CLPermute _permute_input_to_nchw; - CLPermute _permute_weights_to_nchw; - CLPermute _permute_output_to_nhwc; - CLDepthwiseConvolutionLayerReshapeWeightsKernel _reshape_weights; - CLTensor _permuted_input; - CLTensor _permuted_weights; - CLTensor _permuted_output; - CLTensor _output_multipliers; - CLTensor _output_shifts; - const ITensor *_original_weights; - const ITensor *_input; - const ITensor *_output; - bool _needs_permute; - bool _needs_weights_reshape; - bool _is_prepared; - bool _is_quantized; + MemoryGroup _memory_group; + std::unique_ptr _kernel; + std::unique_ptr _border_handler; + CLPermute _permute_input_to_nchw; + CLPermute _permute_weights_to_nchw; + CLPermute _permute_output_to_nhwc; + std::unique_ptr _reshape_weights; + CLTensor _permuted_input; + CLTensor _permuted_weights; + CLTensor _permuted_output; + CLTensor _output_multipliers; + CLTensor _output_shifts; + const ITensor *_original_weights; + const ITensor *_input; + const ITensor *_output; + bool _needs_permute; + bool _needs_weights_reshape; + bool _is_prepared; + bool _is_quantized; }; /** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels: @@ -313,10 +314,10 @@ private: private: MemoryGroup _memory_group; - CLDepthwiseConvolutionLayerNativeKernel _dwc_native_kernel; - CLPermute _permute_input_to_nhwc; - CLPermute _permute_weights_to_nhwc; - CLPermute _permute_output_to_nchw; + std::unique_ptr _dwc_native_kernel; + CLPermute _permute_input_to_nhwc; + CLPermute _permute_weights_to_nhwc; + CLPermute _permute_output_to_nchw; CLTensor _permuted_input; CLTensor _permuted_weights; diff --git a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h index 88ed915421..b2cf3356f4 100644 --- a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h @@ -31,7 +31,9 @@ namespace arm_compute { // Forward declarations +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLDequantizationLayerKernel that dequantizes an input tensor */ class CLDequantizationLayer : public ICLSimpleFunction diff --git a/arm_compute/runtime/CL/functions/CLDerivative.h b/arm_compute/runtime/CL/functions/CLDerivative.h index 1aba6a9f6c..4a91d5d50b 100644 --- a/arm_compute/runtime/CL/functions/CLDerivative.h +++ b/arm_compute/runtime/CL/functions/CLDerivative.h @@ -31,6 +31,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to execute first order derivative operator. This function calls the following CL kernels: diff --git a/arm_compute/runtime/CL/functions/CLDilate.h b/arm_compute/runtime/CL/functions/CLDilate.h index adb9cf4e6c..bf72cd3b26 100644 --- a/arm_compute/runtime/CL/functions/CLDilate.h +++ b/arm_compute/runtime/CL/functions/CLDilate.h @@ -31,6 +31,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to execute dilate. This function calls the following OpenCL kernels: diff --git a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h index 8107fa24f3..0afc9d3f38 100644 --- a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYER_H #define ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYER_H -#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" #include "arm_compute/runtime/IFunction.h" @@ -34,7 +32,11 @@ namespace arm_compute { +class CLCompileContext; +class CLDirectConvolutionLayerKernel; +class CLFillBorderKernel; class ICLTensor; +class ITensorInfo; /** Basic function to execute direct convolution function: */ @@ -43,6 +45,12 @@ class CLDirectConvolutionLayer : public IFunction public: /** Default constructor */ CLDirectConvolutionLayer(); + /** Prevent instances of this class from being copied */ + CLDirectConvolutionLayer(const CLDirectConvolutionLayer &) = delete; + /** Prevent instances of this class from being copied */ + CLDirectConvolutionLayer &operator=(const CLDirectConvolutionLayer &) = delete; + /** Default destructor */ + ~CLDirectConvolutionLayer(); /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -95,9 +103,9 @@ public: void run() override; private: - CLDirectConvolutionLayerKernel _direct_conv_kernel; - CLFillBorderKernel _input_border_handler; - CLActivationLayer _activationlayer_function; + std::unique_ptr _direct_conv_kernel; + std::unique_ptr _input_border_handler; + CLActivationLayer _activationlayer_function; bool _is_activationlayer_enabled; }; diff --git a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h index 5208bfe404..72b5b7dee8 100644 --- a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h +++ b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h @@ -29,7 +29,9 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to perform inverse square root on an input tensor. */ class CLRsqrtLayer : public IFunction diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h index 31d4f2e745..55c5fb3455 100644 --- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h +++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h @@ -30,6 +30,8 @@ namespace arm_compute { class ICLTensor; +class CLCompileContext; +class ITensorInfo; namespace experimental { diff --git a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h index 883f330b33..17352d1a9b 100644 --- a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h +++ b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h @@ -24,16 +24,19 @@ #ifndef ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H #define ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H -#include "arm_compute/core/CL/kernels/CLHistogramKernel.h" -#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h" #include "arm_compute/runtime/CL/CLDistribution1D.h" #include "arm_compute/runtime/CL/CLLut.h" #include "arm_compute/runtime/IFunction.h" #include +#include namespace arm_compute { +class CLCompileContext; +class CLHistogramKernel; +class CLHistogramBorderKernel; +class CLTableLookupKernel; class ICLTensor; using ICLImage = ICLTensor; @@ -48,6 +51,12 @@ class CLEqualizeHistogram : public IFunction public: /** Default Constructor. */ CLEqualizeHistogram(); + /** Prevent instances of this class from being copied */ + CLEqualizeHistogram(const CLEqualizeHistogram &) = delete; + /** Prevent instances of this class from being copied */ + CLEqualizeHistogram &operator=(const CLEqualizeHistogram &) = delete; + /** Default destructor */ + ~CLEqualizeHistogram(); /** Initialise the kernel's inputs. * * @param[in] input Input image. Data types supported: U8. @@ -66,14 +75,14 @@ public: void run() override; private: - CLHistogramKernel _histogram_kernel; /**< Kernel that calculates the histogram of input. */ - CLHistogramBorderKernel _border_histogram_kernel; /**< Kernel that calculates the histogram on the borders. */ - CLTableLookupKernel _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */ - CLDistribution1D _hist; /**< Distribution that holds the histogram of the input image. */ - CLDistribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */ - CLLut _cd_lut; /**< Holds the equalization lookuptable. */ - static const uint32_t max_range = 256; /**< Histogram range of the internal histograms. */ - static const uint32_t nr_bins = 256; /**< Histogram bins of the internal histograms. */ + std::unique_ptr _histogram_kernel; /**< Kernel that calculates the histogram of input. */ + std::unique_ptr _border_histogram_kernel; /**< Kernel that calculates the histogram on the borders. */ + std::unique_ptr _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */ + CLDistribution1D _hist; /**< Distribution that holds the histogram of the input image. */ + CLDistribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */ + CLLut _cd_lut; /**< Holds the equalization lookuptable. */ + static const uint32_t max_range = 256; /**< Histogram range of the internal histograms. */ + static const uint32_t nr_bins = 256; /**< Histogram bins of the internal histograms. */ }; } #endif /*ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H */ diff --git a/arm_compute/runtime/CL/functions/CLErode.h b/arm_compute/runtime/CL/functions/CLErode.h index f8f1c72bc0..9d799bc91e 100644 --- a/arm_compute/runtime/CL/functions/CLErode.h +++ b/arm_compute/runtime/CL/functions/CLErode.h @@ -31,6 +31,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to execute erode. This function calls the following OpenCL kernels: diff --git a/arm_compute/runtime/CL/functions/CLFFT1D.h b/arm_compute/runtime/CL/functions/CLFFT1D.h index a6a35ab320..31a2cc6b06 100644 --- a/arm_compute/runtime/CL/functions/CLFFT1D.h +++ b/arm_compute/runtime/CL/functions/CLFFT1D.h @@ -26,9 +26,6 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h" -#include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h" -#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/FunctionDescriptors.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -36,6 +33,9 @@ namespace arm_compute { // Forward declaration +class CLFFTDigitReverseKernel; +class CLFFTRadixStageKernel; +class CLFFTScaleKernel; class ICLTensor; /** Basic function to execute one dimensional FFT. This function calls the following OpenCL kernels: @@ -49,6 +49,12 @@ class CLFFT1D : public IFunction public: /** Default Constructor */ CLFFT1D(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied */ + CLFFT1D(const CLFFT1D &) = delete; + /** Prevent instances of this class from being copied */ + CLFFT1D &operator=(const CLFFT1D &) = delete; + /** Default destructor */ + ~CLFFT1D(); /** Initialise the function's source, destinations and border mode. * * @param[in] input Source tensor. Data types supported: F32. @@ -78,14 +84,14 @@ public: void run() override; protected: - MemoryGroup _memory_group; - CLFFTDigitReverseKernel _digit_reverse_kernel; - std::vector _fft_kernels; - CLFFTScaleKernel _scale_kernel; - CLTensor _digit_reversed_input; - CLTensor _digit_reverse_indices; - unsigned int _num_ffts; - bool _run_scale; + MemoryGroup _memory_group; + std::unique_ptr _digit_reverse_kernel; + std::vector> _fft_kernels; + std::unique_ptr _scale_kernel; + CLTensor _digit_reversed_input; + CLTensor _digit_reverse_indices; + unsigned int _num_ffts; + bool _run_scale; }; } // namespace arm_compute #endif /*ARM_COMPUTE_CLFFT1D_H */ diff --git a/arm_compute/runtime/CL/functions/CLFFT2D.h b/arm_compute/runtime/CL/functions/CLFFT2D.h index 9ceebeaa32..126944b323 100644 --- a/arm_compute/runtime/CL/functions/CLFFT2D.h +++ b/arm_compute/runtime/CL/functions/CLFFT2D.h @@ -46,6 +46,12 @@ class CLFFT2D : public IFunction public: /** Default Constructor */ CLFFT2D(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied */ + CLFFT2D(const CLFFT2D &) = delete; + /** Prevent instances of this class from being copied */ + CLFFT2D &operator=(const CLFFT2D &) = delete; + /** Default destructor */ + ~CLFFT2D(); /** Initialise the function's source, destinations and border mode. * * @param[in] input Source tensor. Data types supported: F32. diff --git a/arm_compute/runtime/CL/functions/CLFastCorners.h b/arm_compute/runtime/CL/functions/CLFastCorners.h index 698cc67995..e110582c50 100644 --- a/arm_compute/runtime/CL/functions/CLFastCorners.h +++ b/arm_compute/runtime/CL/functions/CLFastCorners.h @@ -25,7 +25,6 @@ #define ARM_COMPUTE_CLFASTCORNERS_H #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Window.h" #include "arm_compute/runtime/CL/CLArray.h" @@ -40,6 +39,8 @@ namespace arm_compute { +class CLFastCornersKernel; +class CLCopyToArrayKernel; class ICLTensor; using ICLImage = ICLTensor; @@ -59,6 +60,8 @@ public: CLFastCorners(const CLFastCorners &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ const CLFastCorners &operator=(const CLFastCorners &) = delete; + /** Default destructor */ + ~CLFastCorners(); /** Initialize the function's source, destination, conv and border_mode. * * @param[in] input Source image. Data types supported: U8. @@ -88,18 +91,18 @@ public: void run() override; private: - MemoryGroup _memory_group; - CLFastCornersKernel _fast_corners_kernel; - CLNonMaximaSuppression3x3 _suppr_func; - CLCopyToArrayKernel _copy_array_kernel; - CLImage _output; - CLImage _suppr; - Window _win; - bool _non_max; - unsigned int *_num_corners; - cl::Buffer _num_buffer; - ICLKeyPointArray *_corners; - uint8_t _constant_border_value; + MemoryGroup _memory_group; + std::unique_ptr _fast_corners_kernel; + CLNonMaximaSuppression3x3 _suppr_func; + std::unique_ptr _copy_array_kernel; + CLImage _output; + CLImage _suppr; + Window _win; + bool _non_max; + unsigned int *_num_corners; + cl::Buffer _num_buffer; + ICLKeyPointArray *_corners; + uint8_t _constant_border_value; }; } #endif /*ARM_COMPUTE_CLFASTCORNERS_H */ diff --git a/arm_compute/runtime/CL/functions/CLFill.h b/arm_compute/runtime/CL/functions/CLFill.h index b79b234158..fef8324432 100644 --- a/arm_compute/runtime/CL/functions/CLFill.h +++ b/arm_compute/runtime/CL/functions/CLFill.h @@ -30,6 +30,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Function to run @ref CLMemsetKernel to fill a tensor with a scalar value */ diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h index 18bc20e654..a4ad82dfd4 100644 --- a/arm_compute/runtime/CL/functions/CLFillBorder.h +++ b/arm_compute/runtime/CL/functions/CLFillBorder.h @@ -30,6 +30,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to run @ref CLFillBorderKernel */ diff --git a/arm_compute/runtime/CL/functions/CLFlattenLayer.h b/arm_compute/runtime/CL/functions/CLFlattenLayer.h index b8139c2260..f5f4ff554f 100644 --- a/arm_compute/runtime/CL/functions/CLFlattenLayer.h +++ b/arm_compute/runtime/CL/functions/CLFlattenLayer.h @@ -29,7 +29,9 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to execute flatten. This function calls the following OpenCL kernel: * diff --git a/arm_compute/runtime/CL/functions/CLFloor.h b/arm_compute/runtime/CL/functions/CLFloor.h index 93c3639f89..85d7071194 100644 --- a/arm_compute/runtime/CL/functions/CLFloor.h +++ b/arm_compute/runtime/CL/functions/CLFloor.h @@ -30,7 +30,9 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLFloorKernel */ class CLFloor : public ICLSimpleFunction diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h index 29788742d7..3f17e4a921 100644 --- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h +++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h @@ -26,7 +26,6 @@ #include "arm_compute/runtime/CL/ICLSimpleFunction.h" -#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h" #include "arm_compute/runtime/CL/functions/CLFlattenLayer.h" diff --git a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h index de6d5617c2..e35905fcf1 100644 --- a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h +++ b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h @@ -24,14 +24,18 @@ #ifndef ARM_COMPUTE_CLFUSEBATCHNORMALIZATION_H #define ARM_COMPUTE_CLFUSEBATCHNORMALIZATION_H -#include "arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" +#include + namespace arm_compute { // Forward declarations +class CLCompileContext; +class CLFuseBatchNormalizationKernel; class ICLTensor; +class ITensorInfo; /** Basic function to fuse the batch normalization node to a preceding convolution node */ class CLFuseBatchNormalization : public IFunction @@ -48,7 +52,7 @@ public: /** Allow instances of this class to be moved */ CLFuseBatchNormalization &operator=(CLFuseBatchNormalization &&) = default; /** Default destructor */ - ~CLFuseBatchNormalization() = default; + ~CLFuseBatchNormalization(); /** Set the input and output tensors. * * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC @@ -112,7 +116,7 @@ public: void run() override; private: - CLFuseBatchNormalizationKernel _fuse_bn_kernel; + std::unique_ptr _fuse_bn_kernel; }; } // namespace arm_compute #endif /*ARM_COMPUTE_CLFUSEBATCHNORMALIZATION_H */ diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h index 92f9736e35..0b13e7dbbf 100644 --- a/arm_compute/runtime/CL/functions/CLGEMM.h +++ b/arm_compute/runtime/CL/functions/CLGEMM.h @@ -24,11 +24,6 @@ #ifndef ARM_COMPUTE_CLGEMM_H #define ARM_COMPUTE_CLGEMM_H -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTypes.h" #include "arm_compute/runtime/IFunction.h" @@ -36,9 +31,18 @@ #include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" +#include + namespace arm_compute { +class CLCompileContext; +class CLGEMMReshapeRHSMatrixKernel; +class CLGEMMMatrixMultiplyKernel; +class CLGEMMMatrixMultiplyReshapedKernel; +class CLGEMMMatrixMultiplyReshapedOnlyRHSKernel; +class CLGEMMReshapeLHSMatrixKernel; class ICLTensor; +class ITensorInfo; namespace weights_transformations { @@ -46,41 +50,36 @@ namespace weights_transformations class CLGEMMReshapeRHSMatrixKernelManaged : public ITransformWeights { public: + /** Default constructor */ + CLGEMMReshapeRHSMatrixKernelManaged(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMReshapeRHSMatrixKernelManaged(const CLGEMMReshapeRHSMatrixKernelManaged &) = delete; + /** Default move constructor */ + CLGEMMReshapeRHSMatrixKernelManaged(CLGEMMReshapeRHSMatrixKernelManaged &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMReshapeRHSMatrixKernelManaged &operator=(const CLGEMMReshapeRHSMatrixKernelManaged &) = delete; + /** Default move assignment operator */ + CLGEMMReshapeRHSMatrixKernelManaged &operator=(CLGEMMReshapeRHSMatrixKernelManaged &&) = default; + /** Default desctructor */ + ~CLGEMMReshapeRHSMatrixKernelManaged(); //Inherited method override - void run() override - { - _output.allocator()->allocate(); - CLScheduler::get().enqueue(_kernel, false); - _reshape_run = true; - } + void run() override; //Inherited method override - void release() override - { - _output.allocator()->free(); - } + void release() override; //Inherited method override - ICLTensor *get_weights() override - { - return &_output; - } + ICLTensor *get_weights() override; //Inherited method override - uint32_t uid() override - { - return _uid; - } + uint32_t uid() override; /** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel * * @param[in] input Input tensor. Data types supported: All * @param[in] info RHS matrix information to be used for reshaping. */ - void configure(const ICLTensor *input, GEMMRHSMatrixInfo info) - { - configure(CLKernelLibrary::get().get_compile_context(), input, info); - } + void configure(const ICLTensor *input, GEMMRHSMatrixInfo info); /** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel * @@ -88,15 +87,12 @@ public: * @param[in] input Input tensor. Data types supported: All * @param[in] info RHS matrix information to be used for reshaping. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info) - { - _kernel.configure(compile_context, input, &_output, info); - } + void configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info); private: - static constexpr uint32_t _uid = 0x15; - CLTensor _output{}; - CLGEMMReshapeRHSMatrixKernel _kernel{}; + static constexpr uint32_t _uid{ 0x15 }; + CLTensor _output{}; + std::unique_ptr _kernel; }; } // namespace weights_transformations @@ -126,6 +122,8 @@ public: CLGEMM &operator=(const CLGEMM &) = delete; /** Default move assignment operator */ CLGEMM &operator=(CLGEMM &&) = default; + /** Default destructor */ + ~CLGEMM(); /** Initialise the kernel's inputs and output * * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. @@ -198,24 +196,24 @@ private: static Status validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); static Status validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - CLGEMMMatrixMultiplyKernel _mm_kernel; - CLGEMMReshapeLHSMatrixKernel _reshape_lhs_kernel; - CLGEMMReshapeRHSMatrixKernel _reshape_rhs_kernel; - weights_transformations::CLGEMMReshapeRHSMatrixKernelManaged _reshape_rhs_kernel_managed; - CLGEMMMatrixMultiplyReshapedKernel _mm_reshaped_kernel; - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel; - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_fallback_kernel; - CLTensor _tmp_a; - CLTensor _tmp_b; - const ICLTensor *_original_b; - const ICLTensor *_lhs; - ICLTensor *_dst; - bool _reshape_b_only_on_first_run; - bool _is_prepared; - bool _has_pad_y; - CLGEMMKernelType _gemm_kernel_type; + MemoryGroup _memory_group; + IWeightsManager *_weights_manager; + std::unique_ptr _mm_kernel; + std::unique_ptr _reshape_lhs_kernel; + std::unique_ptr _reshape_rhs_kernel; + std::unique_ptr _reshape_rhs_kernel_managed; + std::unique_ptr _mm_reshaped_kernel; + std::unique_ptr _mm_reshaped_only_rhs_kernel; + std::unique_ptr _mm_reshaped_only_rhs_fallback_kernel; + CLTensor _tmp_a; + CLTensor _tmp_b; + const ICLTensor *_original_b; + const ICLTensor *_lhs; + ICLTensor *_dst; + bool _reshape_b_only_on_first_run; + bool _is_prepared; + bool _has_pad_y; + CLGEMMKernelType _gemm_kernel_type; }; } // namespace arm_compute diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h index 467045cd86..340ac6e749 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h @@ -26,9 +26,7 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" -#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" -#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" @@ -43,6 +41,9 @@ namespace arm_compute { +class CLCol2ImKernel; +class CLIm2ColKernel; +class CLWeightsReshapeKernel; class ICLTensor; /** Function to reshape and transpose the weights. This function calls the following kernels: @@ -53,6 +54,16 @@ class CLConvolutionLayerReshapeWeights : public IFunction public: /** Constructor */ CLConvolutionLayerReshapeWeights(); + /** Prevent instances of this class from being copied */ + CLConvolutionLayerReshapeWeights(const CLConvolutionLayerReshapeWeights &) = delete; + /** Prevent instances of this class from being copied */ + CLConvolutionLayerReshapeWeights &operator=(const CLConvolutionLayerReshapeWeights &) = delete; + /** Prevent instances of this class to be moved */ + CLConvolutionLayerReshapeWeights(CLConvolutionLayerReshapeWeights &&) = delete; + /** Prevent instances of this class to be moved */ + CLConvolutionLayerReshapeWeights &operator=(CLConvolutionLayerReshapeWeights &&) = delete; + /** Default destructor */ + ~CLConvolutionLayerReshapeWeights(); /** Set the input and output tensors. * * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. @@ -87,7 +98,7 @@ public: void run() override; private: - CLWeightsReshapeKernel _weights_reshape_kernel; + std::unique_ptr _weights_reshape_kernel; }; namespace weights_transformations @@ -179,6 +190,8 @@ public: CLGEMMConvolutionLayer &operator=(const CLGEMMConvolutionLayer &) = delete; /** Default move assignment operator */ CLGEMMConvolutionLayer &operator=(CLGEMMConvolutionLayer &&) = default; + /**Default destructor */ + ~CLGEMMConvolutionLayer(); /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -288,10 +301,10 @@ private: IWeightsManager *_weights_manager; CLConvolutionLayerReshapeWeights _reshape_weights; weights_transformations::CLConvolutionLayerReshapeWeightsTransform _reshape_weights_managed; - CLIm2ColKernel _im2col_kernel; + std::unique_ptr _im2col_kernel; CLGEMM _mm_gemm; CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - CLCol2ImKernel _col2im_kernel; + std::unique_ptr _col2im_kernel; CLActivationLayer _activationlayer_function; const ICLTensor *_original_weights; diff --git a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h index 1fedeff444..32af0f9427 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_CLGEMMDECONVOLUTIONLAYER_H #define ARM_COMPUTE_CLGEMMDECONVOLUTIONLAYER_H -#include "arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" @@ -40,6 +39,7 @@ namespace arm_compute { +class CLDeconvolutionReshapeOutputKernel; class ICLTensor; /** Function to run the deconvolution layer through a call to GEMM. * @@ -89,6 +89,8 @@ public: CLGEMMDeconvolutionLayer &operator=(const CLGEMMDeconvolutionLayer &) = delete; /** Default move assignment operator */ CLGEMMDeconvolutionLayer &operator=(CLGEMMDeconvolutionLayer &&) = default; + /** Default desctructor */ + ~CLGEMMDeconvolutionLayer(); /** Set the input, weights, biases and output tensors. * * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. @@ -130,15 +132,15 @@ public: private: MemoryGroup _memory_group; - CLGEMM _mm_gemm; - CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - CLGEMMLowpOutputStage _gemmlowp_output_stage; - CLPermute _permute_input_to_nhwc; - CLPermute _permute_weights_to_nhwc; - CLReshapeLayer _reshape_weights; - CLTranspose _transpose_weights; - CLDeconvolutionReshapeOutputKernel _deconv_reshape; - CLSlice _slice_gemm; + CLGEMM _mm_gemm; + CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; + CLGEMMLowpOutputStage _gemmlowp_output_stage; + CLPermute _permute_input_to_nhwc; + CLPermute _permute_weights_to_nhwc; + CLReshapeLayer _reshape_weights; + CLTranspose _transpose_weights; + std::unique_ptr _deconv_reshape; + CLSlice _slice_gemm; CLTensor _gemmlowp_final; CLTensor _reshaped_weights; diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h index 57b1e30df5..4cc8899690 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h @@ -24,21 +24,24 @@ #ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H #define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H -#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" namespace arm_compute { +class CLCompileContext; class IMemoryManager; class ICLTensor; +class ITensorInfo; +class CLDepthConvertLayerKernel; +class CLGEMMLowpMatrixMultiplyNativeKernel; +class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel; +class CLGEMMLowpOffsetContributionKernel; +class CLGEMMLowpOffsetContributionOutputStageKernel; +class CLGEMMLowpMatrixAReductionKernel; +class CLGEMMLowpMatrixBReductionKernel; +class CLGEMMReshapeRHSMatrixKernel; /** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. */ class CLGEMMLowpMatrixMultiplyCore : public IFunction @@ -54,6 +57,8 @@ public: CLGEMMLowpMatrixMultiplyCore &operator=(const CLGEMMLowpMatrixMultiplyCore &) = delete; /** Default move assignment operator */ CLGEMMLowpMatrixMultiplyCore &operator=(CLGEMMLowpMatrixMultiplyCore &&) = default; + /** Default destructor */ + ~CLGEMMLowpMatrixMultiplyCore(); /** Initialise the kernel's inputs, output * * @note GEMMLowp: low precision GEMM kernel. [A * B + C] @@ -112,14 +117,14 @@ private: MemoryGroup _memory_group; // Kernels used - CLDepthConvertLayerKernel _weights_to_qasymm8; - CLGEMMLowpMatrixMultiplyNativeKernel _mm_native_kernel; - CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel; - CLGEMMReshapeRHSMatrixKernel _mtx_b_reshape_kernel; - CLGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel; - CLGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel; - CLGEMMLowpOffsetContributionKernel _offset_contribution_kernel; - CLGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel; + std::unique_ptr _weights_to_qasymm8; + std::unique_ptr _mm_native_kernel; + std::unique_ptr _mm_reshaped_only_rhs_kernel; + std::unique_ptr _mtx_b_reshape_kernel; + std::unique_ptr _mtx_a_reduction_kernel; + std::unique_ptr _mtx_b_reduction_kernel; + std::unique_ptr _offset_contribution_kernel; + std::unique_ptr _offset_contribution_output_stage_kernel; // Temporary tensors CLTensor _qasymm8_weights; diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h index 44c52ffb79..a4edab9b8f 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h +++ b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h @@ -24,8 +24,11 @@ #ifndef ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H #define ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" +#include + /** This file contains all available output stages for GEMMLowp on OpenCL. * * In gemmlowp, the "output stage" is the process that takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyCore), @@ -36,7 +39,11 @@ namespace arm_compute { +class CLCompileContext; class ITensor; +class ICLTensor; +class ITensorInfo; +struct GEMMLowpOutputStageInfo; /** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on OpenCL. * diff --git a/arm_compute/runtime/CL/functions/CLGather.h b/arm_compute/runtime/CL/functions/CLGather.h index e87a120ba1..9c659be6fc 100644 --- a/arm_compute/runtime/CL/functions/CLGather.h +++ b/arm_compute/runtime/CL/functions/CLGather.h @@ -25,11 +25,14 @@ #ifndef ARM_COMPUTE_CLGATHER_H #define ARM_COMPUTE_CLGATHER_H +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLGatherKernel */ class CLGather : public ICLSimpleFunction diff --git a/arm_compute/runtime/CL/functions/CLGaussian3x3.h b/arm_compute/runtime/CL/functions/CLGaussian3x3.h index 9fe3e9bb00..286a17618b 100644 --- a/arm_compute/runtime/CL/functions/CLGaussian3x3.h +++ b/arm_compute/runtime/CL/functions/CLGaussian3x3.h @@ -31,6 +31,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to execute gaussian filter 3x3. This function calls the following OpenCL kernels: diff --git a/arm_compute/runtime/CL/functions/CLGaussian5x5.h b/arm_compute/runtime/CL/functions/CLGaussian5x5.h index fb369d750b..cf5b79eaac 100644 --- a/arm_compute/runtime/CL/functions/CLGaussian5x5.h +++ b/arm_compute/runtime/CL/functions/CLGaussian5x5.h @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_CLGAUSSIAN5X5_H #define ARM_COMPUTE_CLGAUSSIAN5X5_H -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" -#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" @@ -37,6 +35,10 @@ namespace arm_compute { +class CLCompileContext; +class CLFillBorderKernel; +class CLGaussian5x5HorKernel; +class CLGaussian5x5VertKernel; class ICLTensor; /** Basic function to execute gaussian filter 5x5. This function calls the following OpenCL kernels: @@ -54,6 +56,16 @@ public: * @param[in] memory_manager (Optional) Memory manager. */ CLGaussian5x5(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied */ + CLGaussian5x5(const CLGaussian5x5 &) = delete; + /** Default move constructor */ + CLGaussian5x5(CLGaussian5x5 &&) = default; + /** Prevent instances of this class from being copied */ + CLGaussian5x5 &operator=(const CLGaussian5x5 &) = delete; + /** Default move assignment operator */ + CLGaussian5x5 &operator=(CLGaussian5x5 &&) = default; + /** Default destructor */ + ~CLGaussian5x5(); /** Initialise the function's source, destinations and border mode. * * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) @@ -76,11 +88,11 @@ public: void run() override; protected: - MemoryGroup _memory_group; /**< Function's memory group */ - CLGaussian5x5HorKernel _kernel_hor; /**< Horizontal pass kernel */ - CLGaussian5x5VertKernel _kernel_vert; /**< Vertical pass kernel */ - CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */ - CLImage _tmp; /**< Temporary buffer */ + MemoryGroup _memory_group; /**< Function's memory group */ + std::unique_ptr _kernel_hor; /**< Horizontal pass kernel */ + std::unique_ptr _kernel_vert; /**< Vertical pass kernel */ + std::unique_ptr _border_handler; /**< Kernel to handle image borders */ + CLImage _tmp; /**< Temporary buffer */ }; } #endif /*ARM_COMPUTE_CLGAUSSIAN5X5_H */ diff --git a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h index 70f324be11..b18e5f98f0 100644 --- a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h +++ b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h @@ -24,9 +24,6 @@ #ifndef ARM_COMPUTE_CLGAUSSIANPYRAMID_H #define ARM_COMPUTE_CLGAUSSIANPYRAMID_H -#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h" - -#include "arm_compute/core/CL/kernels/CLScaleKernel.h" #include "arm_compute/core/IPyramid.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLPyramid.h" @@ -38,7 +35,12 @@ namespace arm_compute { +class CLCompileContext; +class CLFillBorderKernel; class ICLTensor; +class CLGaussianPyramidHorKernel; +class CLGaussianPyramidVertKernel; +class CLScaleKernel; /** Common interface for all Gaussian pyramid functions */ class CLGaussianPyramid : public IFunction @@ -55,7 +57,7 @@ public: /** Allow instances of this class to be moved */ CLGaussianPyramid &operator=(CLGaussianPyramid &&) = default; /** Default destructor */ - virtual ~CLGaussianPyramid() = default; + ~CLGaussianPyramid(); /** Initialise the function's source, destinations and border mode. * * @param[in, out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) @@ -93,6 +95,12 @@ class CLGaussianPyramidHalf : public CLGaussianPyramid public: /** Constructor */ CLGaussianPyramidHalf(); + /** Prevent instances of this class from being copied */ + CLGaussianPyramidHalf(const CLGaussianPyramidHalf &) = delete; + /** Prevent instances of this class from being copied */ + CLGaussianPyramidHalf &operator=(const CLGaussianPyramidHalf &) = delete; + /** Default destructor */ + ~CLGaussianPyramidHalf(); // Inherited methods overridden: void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; @@ -100,10 +108,10 @@ public: void run() override; private: - std::vector _horizontal_border_handler; - std::vector _vertical_border_handler; - std::vector _horizontal_reduction; - std::vector _vertical_reduction; + std::vector> _horizontal_border_handler; + std::vector> _vertical_border_handler; + std::vector> _horizontal_reduction; + std::vector> _vertical_reduction; }; /** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following OpenCL kernels and functions: @@ -124,8 +132,8 @@ public: void run() override; private: - std::vector _gauss5x5; - std::vector _scale_nearest; + std::vector _gauss5x5; + std::vector> _scale_nearest; }; } #endif /*ARM_COMPUTE_CLGAUSSIANPYRAMID_H */ diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h index 6d5f2e5d71..0fb9a06c84 100644 --- a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h +++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h @@ -23,12 +23,7 @@ */ #ifndef ARM_COMPUTE_CLGENERATEPROPOSALSLAYER_H #define ARM_COMPUTE_CLGENERATEPROPOSALSLAYER_H -#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h" -#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLPermuteKernel.h" -#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h" + #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTensor.h" @@ -38,9 +33,19 @@ #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" +#include + namespace arm_compute { +class CLCompileContext; +class CLBoundingBoxTransformKernel; +class CLDequantizationLayerKernel; +class CLComputeAllAnchorsKernel; +class CLPadLayerKernel; +class CLPermuteKernel; +class CLQuantizationLayerKernel; class ICLTensor; +class ITensorInfo; /** Basic function to generate proposals for a RPN (Region Proposal Network) * @@ -67,6 +72,8 @@ public: CLGenerateProposalsLayer(const CLGenerateProposalsLayer &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ CLGenerateProposalsLayer &operator=(const CLGenerateProposalsLayer &) = delete; + /** Default destructor */ + ~CLGenerateProposalsLayer(); /** Set the input and output tensors. * @@ -130,16 +137,16 @@ private: MemoryGroup _memory_group; // OpenCL kernels - CLPermuteKernel _permute_deltas_kernel; - CLReshapeLayer _flatten_deltas; - CLPermuteKernel _permute_scores_kernel; - CLReshapeLayer _flatten_scores; - CLComputeAllAnchorsKernel _compute_anchors_kernel; - CLBoundingBoxTransformKernel _bounding_box_kernel; - CLPadLayerKernel _pad_kernel; - CLDequantizationLayerKernel _dequantize_anchors; - CLDequantizationLayerKernel _dequantize_deltas; - CLQuantizationLayerKernel _quantize_all_proposals; + std::unique_ptr _permute_deltas_kernel; + CLReshapeLayer _flatten_deltas; + std::unique_ptr _permute_scores_kernel; + CLReshapeLayer _flatten_scores; + std::unique_ptr _compute_anchors_kernel; + std::unique_ptr _bounding_box_kernel; + std::unique_ptr _pad_kernel; + std::unique_ptr _dequantize_anchors; + std::unique_ptr _dequantize_deltas; + std::unique_ptr _quantize_all_proposals; // CPP functions CPPBoxWithNonMaximaSuppressionLimit _cpp_nms; diff --git a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h index dad7e6edf8..fa37b3c84e 100644 --- a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h +++ b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_CLHOGDESCRIPTOR_H #define ARM_COMPUTE_CLHOGDESCRIPTOR_H -#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLHOGGradient.h" @@ -37,6 +36,8 @@ namespace arm_compute { class IHOG; +class CLHOGOrientationBinningKernel; +class CLHOGBlockNormalizationKernel; /** Basic function to calculate HOG descriptor. This function calls the following OpenCL kernels: * * -# @ref CLHOGGradient @@ -49,6 +50,12 @@ class CLHOGDescriptor : public IFunction public: /** Default constructor */ CLHOGDescriptor(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied */ + CLHOGDescriptor(const CLHOGDescriptor &) = delete; + /** Prevent instances of this class from being copied */ + CLHOGDescriptor &operator=(const CLHOGDescriptor &) = delete; + /** Default destructor */ + ~CLHOGDescriptor(); /** Initialise the function's source, destination, HOG data-object and border mode * * @param[in, out] input Input tensor. Data type supported: U8 @@ -75,13 +82,13 @@ public: void run() override; private: - MemoryGroup _memory_group; - CLHOGGradient _gradient; - CLHOGOrientationBinningKernel _orient_bin; - CLHOGBlockNormalizationKernel _block_norm; - CLTensor _mag; - CLTensor _phase; - CLTensor _hog_space; + MemoryGroup _memory_group; + CLHOGGradient _gradient; + std::unique_ptr _orient_bin; + std::unique_ptr _block_norm; + CLTensor _mag; + CLTensor _phase; + CLTensor _hog_space; }; } diff --git a/arm_compute/runtime/CL/functions/CLHOGDetector.h b/arm_compute/runtime/CL/functions/CLHOGDetector.h index 6697b5c24d..edc5b652d3 100644 --- a/arm_compute/runtime/CL/functions/CLHOGDetector.h +++ b/arm_compute/runtime/CL/functions/CLHOGDetector.h @@ -24,13 +24,20 @@ #ifndef ARM_COMPUTE_CLHOGDETECTOR_H #define ARM_COMPUTE_CLHOGDETECTOR_H +#include "arm_compute/core/CL/ICLArray.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h" #include "arm_compute/core/IHOG.h" #include "arm_compute/runtime/IFunction.h" +#include + namespace arm_compute { +class CLCompileContext; +class CLHOGDetectorKernel; +class ICLTensor; +class ICLHOG; + /** Basic function to execute HOG detector based on linear SVM. This function calls the following OpenCL kernel: * * -# @ref CLHOGDetectorKernel @@ -50,7 +57,7 @@ public: /** Allow instances of this class to be moved */ CLHOGDetector &operator=(CLHOGDetector &&) = default; /** Default destructor */ - ~CLHOGDetector() = default; + ~CLHOGDetector(); /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class * * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it. @@ -78,16 +85,16 @@ public: * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to */ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, - float threshold = 0.0f, + float threshold = 0.0f, size_t idx_class = 0); // Inherited methods overridden: void run() override; private: - CLHOGDetectorKernel _hog_detector_kernel; - ICLDetectionWindowArray *_detection_windows; - cl::Buffer _num_detection_windows; + std::unique_ptr _hog_detector_kernel; + ICLDetectionWindowArray *_detection_windows; + cl::Buffer _num_detection_windows; }; } diff --git a/arm_compute/runtime/CL/functions/CLHOGGradient.h b/arm_compute/runtime/CL/functions/CLHOGGradient.h index b0589027e7..39d26fb110 100644 --- a/arm_compute/runtime/CL/functions/CLHOGGradient.h +++ b/arm_compute/runtime/CL/functions/CLHOGGradient.h @@ -24,9 +24,6 @@ #ifndef ARM_COMPUTE_CLHOGGRADIENT_H #define ARM_COMPUTE_CLHOGGRADIENT_H -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLDerivative.h" @@ -39,6 +36,9 @@ namespace arm_compute { +class CLCompileContext; +class CLMagnitudePhaseKernel; +class ITensorInfo; /** Basic function to calculate the gradient for HOG. This function calls the following OpenCL kernels: * * -# @ref CLDerivative @@ -79,11 +79,11 @@ public: void run() override; private: - MemoryGroup _memory_group; - CLDerivative _derivative; - CLMagnitudePhaseKernel _mag_phase; - CLTensor _gx; - CLTensor _gy; + MemoryGroup _memory_group; + CLDerivative _derivative; + std::unique_ptr _mag_phase; + CLTensor _gx; + CLTensor _gy; }; } #endif /*ARM_COMPUTE_CLHOGGRADIENT_H */ diff --git a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h index e7631c2c5a..2a2c9a0a5c 100644 --- a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h +++ b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h @@ -26,7 +26,6 @@ #include "arm_compute/core/CL/ICLArray.h" #include "arm_compute/core/CL/ICLMultiHOG.h" -#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h" #include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLHOGDetector.h" @@ -39,6 +38,9 @@ namespace arm_compute { +class CLCompileContext; +class CLHOGOrientationBinningKernel; +class CLHOGBlockNormalizationKernel; /** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following kernels: * * -# @ref CLHOGGradient @@ -62,6 +64,8 @@ public: CLHOGMultiDetection(const CLHOGMultiDetection &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ CLHOGMultiDetection &operator=(const CLHOGMultiDetection &) = delete; + /** Default destructor */ + ~CLHOGMultiDetection(); /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression * * @param[in, out] input Input tensor. Data type supported: U8 @@ -110,21 +114,21 @@ public: void run() override; private: - MemoryGroup _memory_group; - CLHOGGradient _gradient_kernel; - std::vector _orient_bin_kernel; - std::vector _block_norm_kernel; - std::vector _hog_detect_kernel; - CPPDetectionWindowNonMaximaSuppressionKernel _non_maxima_kernel; - std::vector _hog_space; - std::vector _hog_norm_space; - ICLDetectionWindowArray *_detection_windows; - CLTensor _mag; - CLTensor _phase; - bool _non_maxima_suppression; - size_t _num_orient_bin_kernel; - size_t _num_block_norm_kernel; - size_t _num_hog_detect_kernel; + MemoryGroup _memory_group; + CLHOGGradient _gradient_kernel; + std::vector> _orient_bin_kernel; + std::vector> _block_norm_kernel; + std::vector _hog_detect_kernel; + CPPDetectionWindowNonMaximaSuppressionKernel _non_maxima_kernel; + std::vector _hog_space; + std::vector _hog_norm_space; + ICLDetectionWindowArray *_detection_windows; + CLTensor _mag; + CLTensor _phase; + bool _non_maxima_suppression; + size_t _num_orient_bin_kernel; + size_t _num_block_norm_kernel; + size_t _num_hog_detect_kernel; }; } diff --git a/arm_compute/runtime/CL/functions/CLHarrisCorners.h b/arm_compute/runtime/CL/functions/CLHarrisCorners.h index 326a895d39..c9c67f5a28 100644 --- a/arm_compute/runtime/CL/functions/CLHarrisCorners.h +++ b/arm_compute/runtime/CL/functions/CLHarrisCorners.h @@ -24,16 +24,13 @@ #ifndef ARM_COMPUTE_CLHARRISCORNERS_H #define ARM_COMPUTE_CLHARRISCORNERS_H -#include "arm_compute/runtime/IFunction.h" - #include "arm_compute/core/CL/ICLArray.h" -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" -#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" #include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" #include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h" +#include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" #include @@ -41,6 +38,9 @@ namespace arm_compute { +class CLCompileContext; +class CLHarrisScoreKernel; +class CLFillBorderKernel; class ICLTensor; using ICLImage = ICLTensor; @@ -66,6 +66,8 @@ public: CLHarrisCorners(const CLHarrisCorners &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ const CLHarrisCorners &operator=(const CLHarrisCorners &) = delete; + /** Default destructor */ + ~CLHarrisCorners(); /** Initialize the function's source, destination, conv and border_mode. * * @param[in,out] input Source image. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) @@ -104,21 +106,21 @@ public: void run() override; private: - MemoryGroup _memory_group; /**< Function's memory group */ - std::unique_ptr _sobel; /**< Sobel function */ - CLHarrisScoreKernel _harris_score; /**< Harris score kernel */ - CLNonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */ - CPPCornerCandidatesKernel _candidates; /**< Sort kernel */ - CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */ - CLFillBorderKernel _border_gx; /**< Border handler before running harris score */ - CLFillBorderKernel _border_gy; /**< Border handler before running harris score */ - CLImage _gx; /**< Source image - Gx component */ - CLImage _gy; /**< Source image - Gy component */ - CLImage _score; /**< Source image - Harris score */ - CLImage _nonmax; /**< Source image - Non-Maxima suppressed image */ - std::vector _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */ - int32_t _num_corner_candidates; /**< Number of potential corner candidates */ - ICLKeyPointArray *_corners; /**< Output corners array */ + MemoryGroup _memory_group; /**< Function's memory group */ + std::unique_ptr _sobel; /**< Sobel function */ + std::unique_ptr _harris_score; /**< Harris score kernel */ + CLNonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */ + CPPCornerCandidatesKernel _candidates; /**< Sort kernel */ + CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */ + std::unique_ptr _border_gx; /**< Border handler before running harris score */ + std::unique_ptr _border_gy; /**< Border handler before running harris score */ + CLImage _gx; /**< Source image - Gx component */ + CLImage _gy; /**< Source image - Gy component */ + CLImage _score; /**< Source image - Harris score */ + CLImage _nonmax; /**< Source image - Non-Maxima suppressed image */ + std::vector _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */ + int32_t _num_corner_candidates; /**< Number of potential corner candidates */ + ICLKeyPointArray *_corners; /**< Output corners array */ }; } #endif /*ARM_COMPUTE_CLHARRISCORNERS_H */ diff --git a/arm_compute/runtime/CL/functions/CLHistogram.h b/arm_compute/runtime/CL/functions/CLHistogram.h index 7fdb8a9022..164bd0a28a 100644 --- a/arm_compute/runtime/CL/functions/CLHistogram.h +++ b/arm_compute/runtime/CL/functions/CLHistogram.h @@ -24,8 +24,8 @@ #ifndef ARM_COMPUTE_CLHISTOGRAM_H #define ARM_COMPUTE_CLHISTOGRAM_H -#include "arm_compute/core/CL/kernels/CLHistogramKernel.h" #include "arm_compute/runtime/IFunction.h" +#include "src/core/CL/kernels/CLHistogramKernel.h" namespace arm_compute { diff --git a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h index d7aa11cbc8..d41f3fedf6 100644 --- a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h @@ -24,11 +24,14 @@ #ifndef ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYER_H #define ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYER_H +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to perform a Instance normalization. * diff --git a/arm_compute/runtime/CL/functions/CLIntegralImage.h b/arm_compute/runtime/CL/functions/CLIntegralImage.h index 6b10ede650..0ecdbde8fe 100644 --- a/arm_compute/runtime/CL/functions/CLIntegralImage.h +++ b/arm_compute/runtime/CL/functions/CLIntegralImage.h @@ -24,11 +24,15 @@ #ifndef ARM_COMPUTE_CLINTEGRALIMAGE_H #define ARM_COMPUTE_CLINTEGRALIMAGE_H -#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h" #include "arm_compute/runtime/IFunction.h" +#include + namespace arm_compute { +class CLCompileContext; +class CLIntegralImageHorKernel; +class CLIntegralImageVertKernel; class ICLTensor; /** Basic function to execute integral image. This function calls the following OpenCL kernels: @@ -42,6 +46,12 @@ class CLIntegralImage : public IFunction public: /** Default Constructor. */ CLIntegralImage(); + /** Prevent instances of this class from being copied */ + CLIntegralImage(const CLIntegralImage &) = delete; + /** Prevent instances of this class from being copied */ + CLIntegralImage &operator=(const CLIntegralImage &) = delete; + /** Default destructor */ + ~CLIntegralImage(); /** Initialise the function's source, destinations and border mode. * * @param[in] input Source tensor. Data types supported: U8. @@ -60,8 +70,8 @@ public: void run() override; protected: - CLIntegralImageHorKernel _integral_hor; /**< Integral Image Horizontal kernel */ - CLIntegralImageVertKernel _integral_vert; /**< Integral Image Vertical kernel */ + std::unique_ptr _integral_hor; /**< Integral Image Horizontal kernel */ + std::unique_ptr _integral_vert; /**< Integral Image Vertical kernel */ }; } #endif /*ARM_COMPUTE_CLINTEGRALIMAGE_H */ diff --git a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h index bc79101d9d..401d249eb4 100644 --- a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h +++ b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_CLL2NORMALIZELAYER_H #define ARM_COMPUTE_CLL2NORMALIZELAYER_H -#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" @@ -37,7 +36,10 @@ namespace arm_compute { +class CLCompileContext; +class CLL2NormalizeLayerKernel; class ICLTensor; +class ITensorInfo; /** Basic function to perform a L2 normalization on a given axis. * @@ -50,6 +52,16 @@ class CLL2NormalizeLayer : public IFunction public: /** Constructor */ CLL2NormalizeLayer(std::shared_ptr memory_manager = nullptr); + /** Default Destructor */ + ~CLL2NormalizeLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLL2NormalizeLayer(const CLL2NormalizeLayer &) = delete; + /** Default move constructor */ + CLL2NormalizeLayer(CLL2NormalizeLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLL2NormalizeLayer &operator=(const CLL2NormalizeLayer &) = delete; + /** Default move assignment operator */ + CLL2NormalizeLayer &operator=(CLL2NormalizeLayer &&) = default; /** Set the input and output tensors. * @@ -84,10 +96,10 @@ public: void run() override; private: - MemoryGroup _memory_group; - CLReductionOperation _reduce_func; - CLL2NormalizeLayerKernel _normalize_kernel; - CLTensor _sumsq; + MemoryGroup _memory_group; + CLReductionOperation _reduce_func; + std::unique_ptr _normalize_kernel; + CLTensor _sumsq; }; } #endif /*ARM_COMPUTE_CLL2NORMALIZELAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h index 1a8b33463d..017f26aa1e 100644 --- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h +++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h @@ -26,8 +26,6 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/CL/kernels/CLCopyKernel.h" -#include "arm_compute/core/CL/kernels/CLMemsetKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" @@ -45,6 +43,10 @@ namespace arm_compute { +class CLCompileContext; +class CLCopyKernel; +class CLMemsetKernel; +class CLTransposeKernel; class ICLTensor; /** This function performs a single time step in a Long Short-Term Memory (LSTM) layer. @@ -55,6 +57,16 @@ class CLLSTMLayer : public IFunction public: /** Default constructor */ CLLSTMLayer(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied */ + CLLSTMLayer(const CLLSTMLayer &) = delete; + /** Prevent instances of this class from being copied */ + CLLSTMLayer &operator=(const CLLSTMLayer &) = delete; + /** Prevent instances of this class to be moved */ + CLLSTMLayer(CLLSTMLayer &&) = delete; + /** Prevent instances of this class to be moved */ + CLLSTMLayer &operator=(CLLSTMLayer &&) = delete; + /** Default destructor */ + ~CLLSTMLayer(); /** Initialize function's tensors. * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32. @@ -200,90 +212,90 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - CLFullyConnectedLayer _fully_connected_input_gate; - CLArithmeticAddition _accum_input_gate1; - CLArithmeticSubtraction _subtract_input_gate; - CLPixelWiseMultiplication _pixelwise_mul_input_gate; - CLActivationLayer _activation_input_gate; - CLFullyConnectedLayer _fully_connected_forget_gate; - CLArithmeticAddition _accum_forget_gate1; - CLPixelWiseMultiplication _pixelwise_mul_forget_gate; - CLActivationLayer _activation_forget_gate; - CLFullyConnectedLayer _fully_connected_cell_state; - CLGEMM _gemm_cell_state1; - CLTransposeKernel _transpose_cell_state; - CLArithmeticAddition _accum_cell_state1; - CLArithmeticAddition _accum_cell_state2; - CLPixelWiseMultiplication _pixelwise_mul_cell_state1; - CLActivationLayer _activation_cell_state; - CLActivationLayer _cell_clip; - CLPixelWiseMultiplication _pixelwise_mul_cell_state2; - CLFullyConnectedLayer _fully_connected_output; - CLPixelWiseMultiplication _pixelwise_mul_output_state1; - CLArithmeticAddition _accum_output1; - CLActivationLayer _activation_output; - CLActivationLayer _activation_output_state; - CLPixelWiseMultiplication _pixelwise_mul_output_state2; - CLFullyConnectedLayer _fully_connected_output_state; - CLActivationLayer _projection_clip; - CLCopyKernel _copy_cell_state; - CLCopyKernel _copy_output; - CLConcatenateLayer _concat_scratch_buffer; - CLConcatenateLayer _concat_inputs_forget_gate; - CLConcatenateLayer _concat_weights_forget_gate; - CLConcatenateLayer _concat_weights_input_gate; - CLConcatenateLayer _concat_weights_output; - CLMemsetKernel _ones_memset_kernel; - CLMeanStdDevNormalizationLayer _mean_std_norm_input_gate; - CLPixelWiseMultiplication _pixelwise_mul_input_gate_coeff; - CLArithmeticAddition _accum_input_gate_bias; - CLMeanStdDevNormalizationLayer _mean_std_norm_forget_gate; - CLPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff; - CLArithmeticAddition _accum_forget_gate_bias; - CLMeanStdDevNormalizationLayer _mean_std_norm_cell_gate; - CLPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff; - CLArithmeticAddition _accum_cell_gate_bias; - CLMeanStdDevNormalizationLayer _mean_std_norm_output_gate; - CLPixelWiseMultiplication _pixelwise_mul_output_gate_coeff; - CLArithmeticAddition _accum_output_gate_bias; - CLTensor _input_gate_out1; - CLTensor _input_gate_out2; - CLTensor _input_gate_out3; - CLTensor _input_gate_out4; - CLTensor _forget_gate_out1; - CLTensor _forget_gate_out2; - CLTensor _forget_gate_out3; - CLTensor _forget_gate_out4; - CLTensor _forget_gate_out5; - CLTensor _forget_gate_out6; - CLTensor _cell_state_out1; - CLTensor _cell_state_out2; - CLTensor _cell_state_out3; - CLTensor _cell_state_out4; - CLTensor _cell_state_out5; - CLTensor _output1; - CLTensor _output2; - CLTensor _output3; - CLTensor _output4; - CLTensor _cell_state_activation; - CLTensor _output_state1; - CLTensor _ones; - CLTensor _input_layer_norm_out1; - CLTensor _input_layer_norm_out2; - CLTensor _forget_layer_norm_out1; - CLTensor _forget_layer_norm_out2; - CLTensor _cell_layer_norm_out1; - CLTensor _cell_layer_norm_out2; - CLTensor _output_layer_norm_out1; - CLTensor _output_layer_norm_out2; - bool _run_peephole_opt; - bool _run_cifg_opt; - bool _perform_cell_clipping; - bool _has_projection_weights; - bool _perform_projection_clipping; - bool _is_prepared; - bool _is_layer_norm_lstm; + MemoryGroup _memory_group; + CLFullyConnectedLayer _fully_connected_input_gate; + CLArithmeticAddition _accum_input_gate1; + CLArithmeticSubtraction _subtract_input_gate; + CLPixelWiseMultiplication _pixelwise_mul_input_gate; + CLActivationLayer _activation_input_gate; + CLFullyConnectedLayer _fully_connected_forget_gate; + CLArithmeticAddition _accum_forget_gate1; + CLPixelWiseMultiplication _pixelwise_mul_forget_gate; + CLActivationLayer _activation_forget_gate; + CLFullyConnectedLayer _fully_connected_cell_state; + CLGEMM _gemm_cell_state1; + std::unique_ptr _transpose_cell_state; + CLArithmeticAddition _accum_cell_state1; + CLArithmeticAddition _accum_cell_state2; + CLPixelWiseMultiplication _pixelwise_mul_cell_state1; + CLActivationLayer _activation_cell_state; + CLActivationLayer _cell_clip; + CLPixelWiseMultiplication _pixelwise_mul_cell_state2; + CLFullyConnectedLayer _fully_connected_output; + CLPixelWiseMultiplication _pixelwise_mul_output_state1; + CLArithmeticAddition _accum_output1; + CLActivationLayer _activation_output; + CLActivationLayer _activation_output_state; + CLPixelWiseMultiplication _pixelwise_mul_output_state2; + CLFullyConnectedLayer _fully_connected_output_state; + CLActivationLayer _projection_clip; + std::unique_ptr _copy_cell_state; + std::unique_ptr _copy_output; + CLConcatenateLayer _concat_scratch_buffer; + CLConcatenateLayer _concat_inputs_forget_gate; + CLConcatenateLayer _concat_weights_forget_gate; + CLConcatenateLayer _concat_weights_input_gate; + CLConcatenateLayer _concat_weights_output; + std::unique_ptr _ones_memset_kernel; + CLMeanStdDevNormalizationLayer _mean_std_norm_input_gate; + CLPixelWiseMultiplication _pixelwise_mul_input_gate_coeff; + CLArithmeticAddition _accum_input_gate_bias; + CLMeanStdDevNormalizationLayer _mean_std_norm_forget_gate; + CLPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff; + CLArithmeticAddition _accum_forget_gate_bias; + CLMeanStdDevNormalizationLayer _mean_std_norm_cell_gate; + CLPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff; + CLArithmeticAddition _accum_cell_gate_bias; + CLMeanStdDevNormalizationLayer _mean_std_norm_output_gate; + CLPixelWiseMultiplication _pixelwise_mul_output_gate_coeff; + CLArithmeticAddition _accum_output_gate_bias; + CLTensor _input_gate_out1; + CLTensor _input_gate_out2; + CLTensor _input_gate_out3; + CLTensor _input_gate_out4; + CLTensor _forget_gate_out1; + CLTensor _forget_gate_out2; + CLTensor _forget_gate_out3; + CLTensor _forget_gate_out4; + CLTensor _forget_gate_out5; + CLTensor _forget_gate_out6; + CLTensor _cell_state_out1; + CLTensor _cell_state_out2; + CLTensor _cell_state_out3; + CLTensor _cell_state_out4; + CLTensor _cell_state_out5; + CLTensor _output1; + CLTensor _output2; + CLTensor _output3; + CLTensor _output4; + CLTensor _cell_state_activation; + CLTensor _output_state1; + CLTensor _ones; + CLTensor _input_layer_norm_out1; + CLTensor _input_layer_norm_out2; + CLTensor _forget_layer_norm_out1; + CLTensor _forget_layer_norm_out2; + CLTensor _cell_layer_norm_out1; + CLTensor _cell_layer_norm_out2; + CLTensor _output_layer_norm_out1; + CLTensor _output_layer_norm_out2; + bool _run_peephole_opt; + bool _run_cifg_opt; + bool _perform_cell_clipping; + bool _has_projection_weights; + bool _perform_projection_clipping; + bool _is_prepared; + bool _is_layer_norm_lstm; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLLSTMLAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h index ba85c6140c..3bbf9f2c30 100644 --- a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h +++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h @@ -26,10 +26,6 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" -#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" -#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" -#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -39,7 +35,13 @@ namespace arm_compute { +class CLCompileContext; +class CLCol2ImKernel; +class CLIm2ColKernel; +class CLWeightsReshapeKernel; +class CLLocallyConnectedMatrixMultiplyKernel; class ICLTensor; +class ITensorInfo; /** Basic function to compute the locally connected layer. This function calls the following OpenCL kernels: * @@ -108,16 +110,16 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - CLIm2ColKernel _input_im2col_kernel; - CLWeightsReshapeKernel _weights_reshape_kernel; - CLLocallyConnectedMatrixMultiplyKernel _mm_kernel; - CLCol2ImKernel _output_col2im_kernel; - CLTensor _input_im2col_reshaped; - CLTensor _weights_reshaped; - CLTensor _gemm_output; - bool _is_prepared; - const ICLTensor *_original_weights; + MemoryGroup _memory_group; + std::unique_ptr _input_im2col_kernel; + std::unique_ptr _weights_reshape_kernel; + std::unique_ptr _mm_kernel; + std::unique_ptr _output_col2im_kernel; + CLTensor _input_im2col_reshaped; + CLTensor _weights_reshaped; + CLTensor _gemm_output; + bool _is_prepared; + const ICLTensor *_original_weights; }; } #endif /* ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLMagnitude.h b/arm_compute/runtime/CL/functions/CLMagnitude.h index ad7cc778e5..6ac141641c 100644 --- a/arm_compute/runtime/CL/functions/CLMagnitude.h +++ b/arm_compute/runtime/CL/functions/CLMagnitude.h @@ -29,6 +29,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to run @ref CLMagnitudePhaseKernel. */ diff --git a/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h b/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h index 5c8548f9e0..693862fb89 100644 --- a/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h +++ b/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h @@ -24,14 +24,19 @@ #ifndef ARM_COMPUTE_CLMAXUNPOOLINGLAYER_H #define ARM_COMPUTE_CLMAXUNPOOLINGLAYER_H +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLMemsetKernel.h" +#include namespace arm_compute { -class ITensor; +class CLCompileContext; +class ICLTensor; +class ITensorInfo; +class CLMaxUnpoolingLayerKernel; +class CLMemsetKernel; +struct PoolingLayerInfo; /** Function to perform MaxUnpooling. This function calls the following OpenCL kernels: * @@ -43,6 +48,12 @@ class CLMaxUnpoolingLayer : public IFunction public: /** Constructor */ CLMaxUnpoolingLayer(); + /** Prevent instances of this class from being copied */ + CLMaxUnpoolingLayer(const CLMaxUnpoolingLayer &) = delete; + /** Prevent instances of this class from being copied */ + CLMaxUnpoolingLayer &operator=(const CLMaxUnpoolingLayer &) = delete; + /** Default destructor */ + ~CLMaxUnpoolingLayer(); /** Set the input and output tensors. * * @note Output shape must be equal to the shape of the original input to pool. @@ -88,8 +99,8 @@ public: void run() override; private: - CLMemsetKernel _memset_kernel; - CLMaxUnpoolingLayerKernel _unpooling_layer_kernel; + std::unique_ptr _memset_kernel; + std::unique_ptr _unpooling_layer_kernel; }; } #endif /* ARM_COMPUTE_CLMAXUNPOOLINGLAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDev.h b/arm_compute/runtime/CL/functions/CLMeanStdDev.h index be192a7c11..d9ced1393e 100644 --- a/arm_compute/runtime/CL/functions/CLMeanStdDev.h +++ b/arm_compute/runtime/CL/functions/CLMeanStdDev.h @@ -25,15 +25,20 @@ #define ARM_COMPUTE_CLMEANSTDDEV_H #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" -#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" #include "arm_compute/runtime/CL/functions/CLReductionOperation.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" +#include + namespace arm_compute { +class CLCompileContext; +class ICLTensor; +class ITensorInfo; +class CLFillBorderKernel; +class CLMeanStdDevKernel; /** Basic function to execute mean and standard deviation by calling @ref CLMeanStdDevKernel */ class CLMeanStdDev : public IFunction { @@ -49,7 +54,7 @@ public: /** Allow instances of this class to be moved */ CLMeanStdDev &operator=(CLMeanStdDev &&) = default; /** Default destructor */ - ~CLMeanStdDev() = default; + ~CLMeanStdDev(); /** Initialise the kernel's inputs and outputs. * * @param[in, out] input Input image. Data types supported: U8/F16/F32. (Written to only for border filling) @@ -83,20 +88,20 @@ private: void run_float(); void run_int(); - MemoryGroup _memory_group; /**< Function's memory group */ - DataType _data_type; /**< Input data type. */ - unsigned int _num_pixels; /**< Number of image's pixels. */ - bool _run_stddev; /**< Flag for knowing if we should run stddev reduction function. */ - CLReductionOperation _reduction_operation_mean; /**< Reduction operation function for computing mean value. */ - CLReductionOperation _reduction_operation_stddev; /**< Reduction operation function for computing standard deviation. */ - CLTensor _reduction_output_mean; /**< Reduction operation output tensor for mean value. */ - CLTensor _reduction_output_stddev; /**< Reduction operation output tensor for standard deviation value. */ - float *_mean; /**< Pointer that holds the mean value. */ - float *_stddev; /**< Pointer that holds the standard deviation value. */ - CLMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */ - CLFillBorderKernel _fill_border_kernel; /**< Kernel that fills the border with zeroes. */ - cl::Buffer _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */ - cl::Buffer _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */ + MemoryGroup _memory_group; /**< Function's memory group */ + DataType _data_type; /**< Input data type. */ + unsigned int _num_pixels; /**< Number of image's pixels. */ + bool _run_stddev; /**< Flag for knowing if we should run stddev reduction function. */ + CLReductionOperation _reduction_operation_mean; /**< Reduction operation function for computing mean value. */ + CLReductionOperation _reduction_operation_stddev; /**< Reduction operation function for computing standard deviation. */ + CLTensor _reduction_output_mean; /**< Reduction operation output tensor for mean value. */ + CLTensor _reduction_output_stddev; /**< Reduction operation output tensor for standard deviation value. */ + float *_mean; /**< Pointer that holds the mean value. */ + float *_stddev; /**< Pointer that holds the standard deviation value. */ + std::unique_ptr _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */ + std::unique_ptr _fill_border_kernel; /**< Kernel that fills the border with zeroes. */ + cl::Buffer _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */ + cl::Buffer _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */ }; } #endif /*ARM_COMPUTE_CLMEANSTDDEV_H */ diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h index 1627de1ae8..cfe59eac09 100644 --- a/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h @@ -29,7 +29,9 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to execute mean and standard deviation normalization by calling @ref CLMeanStdDevNormalizationKernel */ class CLMeanStdDevNormalizationLayer : public ICLSimpleFunction diff --git a/arm_compute/runtime/CL/functions/CLMedian3x3.h b/arm_compute/runtime/CL/functions/CLMedian3x3.h index 7f67f958c1..6c0458203e 100644 --- a/arm_compute/runtime/CL/functions/CLMedian3x3.h +++ b/arm_compute/runtime/CL/functions/CLMedian3x3.h @@ -31,6 +31,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to execute median filter. This function calls the following OpenCL kernels: diff --git a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h index 04926f7bd0..4e3f28b006 100644 --- a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h +++ b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h @@ -24,12 +24,16 @@ #ifndef ARM_COMPUTE_CLMINMAXLOCATION_H #define ARM_COMPUTE_CLMINMAXLOCATION_H -#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h" #include "arm_compute/runtime/CL/CLArray.h" #include "arm_compute/runtime/IFunction.h" +#include + namespace arm_compute { +class CLCompileContext; +class CLMinMaxKernel; +class CLMinMaxLocationKernel; class ICLTensor; using ICLImage = ICLTensor; @@ -51,6 +55,8 @@ public: CLMinMaxLocation(CLMinMaxLocation &&) = default; /** Allow instances of this class to be moved */ CLMinMaxLocation &operator=(CLMinMaxLocation &&) = default; + /** Default destructor */ + ~CLMinMaxLocation(); /** Initialise the kernel's inputs and outputs. * * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. @@ -87,16 +93,16 @@ public: void run() override; private: - CLMinMaxKernel _min_max_kernel; /**< Kernel that performs min/max */ - CLMinMaxLocationKernel _min_max_loc_kernel; /**< Kernel that counts min/max occurrences and identifies their positions */ - cl::Buffer _min_max_vals; /**< Buffer to collect min, max values */ - cl::Buffer _min_max_count_vals; /**< Buffer to collect min, max values */ - void *_min; /**< Minimum value. */ - void *_max; /**< Maximum value. */ - uint32_t *_min_count; /**< Minimum value occurrences. */ - uint32_t *_max_count; /**< Maximum value occurrences. */ - CLCoordinates2DArray *_min_loc; /**< Minimum value occurrences coordinates. */ - CLCoordinates2DArray *_max_loc; /**< Maximum value occurrences coordinates. */ + std::unique_ptr _min_max_kernel; /**< Kernel that performs min/max */ + std::unique_ptr _min_max_loc_kernel; /**< Kernel that counts min/max occurrences and identifies their positions */ + cl::Buffer _min_max_vals; /**< Buffer to collect min, max values */ + cl::Buffer _min_max_count_vals; /**< Buffer to collect min, max values */ + void *_min; /**< Minimum value. */ + void *_max; /**< Maximum value. */ + uint32_t *_min_count; /**< Minimum value occurrences. */ + uint32_t *_max_count; /**< Maximum value occurrences. */ + CLCoordinates2DArray *_min_loc; /**< Minimum value occurrences coordinates. */ + CLCoordinates2DArray *_max_loc; /**< Maximum value occurrences coordinates. */ }; } #endif /*ARM_COMPUTE_CLMINMAXLOCATION_H */ diff --git a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h index 8b7e350e09..1b466bf662 100644 --- a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h +++ b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h @@ -31,6 +31,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to execute non linear filter. This function calls the following OpenCL kernels: diff --git a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h index 556de1c64c..c767a042ff 100644 --- a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h +++ b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h @@ -29,6 +29,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following CL kernels: diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h index a2d46b368f..389b21e5c8 100644 --- a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h @@ -24,18 +24,19 @@ #ifndef ARM_COMPUTE_CLNORMALIZATIONLAYER_H #define ARM_COMPUTE_CLNORMALIZATIONLAYER_H -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" -#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/Types.h" +#include namespace arm_compute { +class CLCompileContext; +class CLFillBorderKernel; +class CLNormalizationLayerKernel; class ICLTensor; +class ITensorInfo; /** Basic function to compute a normalization layer. This function calls the following CL kernels: * @@ -48,6 +49,16 @@ class CLNormalizationLayer : public IFunction public: /** Default constructor */ CLNormalizationLayer(); + /** Prevent instances of this class from being copied */ + CLNormalizationLayer(const CLNormalizationLayer &) = delete; + /** Prevent instances of this class from being copied */ + CLNormalizationLayer &operator=(const CLNormalizationLayer &) = delete; + /** Prevent instances of this class to be moved */ + CLNormalizationLayer(CLNormalizationLayer &&) = delete; + /** Prevent instances of this class to be moved */ + CLNormalizationLayer &operator=(CLNormalizationLayer &&) = delete; + /** Default destructor */ + ~CLNormalizationLayer(); /** Set the input and output tensors. * * @param[in, out] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], @@ -85,8 +96,8 @@ public: void run() override; private: - CLNormalizationLayerKernel _norm_kernel; /**< Normalization layer kernel to run */ - CLFillBorderKernel _border_handler; /**< Kernel to handle borders */ + std::unique_ptr _norm_kernel; /**< Normalization layer kernel to run */ + std::unique_ptr _border_handler; /**< Kernel to handle borders */ }; } #endif /* ARM_COMPUTE_CLNORMALIZATIONLAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h b/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h index cf4a9b6497..de5155c65a 100644 --- a/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h +++ b/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h @@ -31,7 +31,9 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLNormalizePlanarYUVLayerKernel * diff --git a/arm_compute/runtime/CL/functions/CLOpticalFlow.h b/arm_compute/runtime/CL/functions/CLOpticalFlow.h index adce6748c8..0e34374aa5 100644 --- a/arm_compute/runtime/CL/functions/CLOpticalFlow.h +++ b/arm_compute/runtime/CL/functions/CLOpticalFlow.h @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_CLOPTICALFLOW_H #define ARM_COMPUTE_CLOPTICALFLOW_H -#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h" - #include "arm_compute/core/IArray.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLArray.h" @@ -41,7 +39,12 @@ namespace arm_compute { +class CLCompileContext; class CLPyramid; +class CLLKTrackerInitKernel; +class CLLKTrackerStage0Kernel; +class CLLKTrackerStage1Kernel; +class CLLKTrackerFinalizeKernel; /** OpenCL Array of Internal Keypoints */ using CLLKInternalKeypointArray = CLArray; @@ -71,6 +74,8 @@ public: CLOpticalFlow(CLOpticalFlow &&) = default; /** Allow instances of this class to be moved */ CLOpticalFlow &operator=(CLOpticalFlow &&) = default; + /** Default destructor */ + ~CLOpticalFlow(); /** Initialise the function input and output * * @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data types supported U8 @@ -117,22 +122,22 @@ public: void run() override; private: - MemoryGroup _memory_group; - std::vector _tracker_init_kernel; - std::vector _tracker_stage0_kernel; - std::vector _tracker_stage1_kernel; - CLLKTrackerFinalizeKernel _tracker_finalize_kernel; - std::vector _func_scharr; - std::vector _scharr_gx; - std::vector _scharr_gy; - const ICLKeyPointArray *_old_points; - const ICLKeyPointArray *_new_points_estimates; - ICLKeyPointArray *_new_points; - std::unique_ptr _old_points_internal; - std::unique_ptr _new_points_internal; - std::unique_ptr _coefficient_table; - std::unique_ptr _old_values; - size_t _num_levels; + MemoryGroup _memory_group; + std::vector> _tracker_init_kernel; + std::vector> _tracker_stage0_kernel; + std::vector> _tracker_stage1_kernel; + std::unique_ptr _tracker_finalize_kernel; + std::vector _func_scharr; + std::vector _scharr_gx; + std::vector _scharr_gy; + const ICLKeyPointArray *_old_points; + const ICLKeyPointArray *_new_points_estimates; + ICLKeyPointArray *_new_points; + std::unique_ptr _old_points_internal; + std::unique_ptr _new_points_internal; + std::unique_ptr _coefficient_table; + std::unique_ptr _old_values; + size_t _num_levels; }; } #endif /*ARM_COMPUTE_CLOPTICALFLOW_H */ diff --git a/arm_compute/runtime/CL/functions/CLPReluLayer.h b/arm_compute/runtime/CL/functions/CLPReluLayer.h index ffde9ec186..ab32bccc24 100644 --- a/arm_compute/runtime/CL/functions/CLPReluLayer.h +++ b/arm_compute/runtime/CL/functions/CLPReluLayer.h @@ -24,13 +24,14 @@ #ifndef ARM_COMPUTE_CLPRELULAYER_H #define ARM_COMPUTE_CLPRELULAYER_H -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/runtime/CL/ICLOperator.h" #include "arm_compute/runtime/IFunction.h" namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; namespace experimental { diff --git a/arm_compute/runtime/CL/functions/CLPadLayer.h b/arm_compute/runtime/CL/functions/CLPadLayer.h index e3a923f81c..2bbde30fc2 100644 --- a/arm_compute/runtime/CL/functions/CLPadLayer.h +++ b/arm_compute/runtime/CL/functions/CLPadLayer.h @@ -24,13 +24,15 @@ #ifndef ARM_COMPUTE_CLPADLAYER_H #define ARM_COMPUTE_CLPADLAYER_H -#include "arm_compute/core/CL/kernels/CLCopyKernel.h" -#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h" +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" namespace arm_compute { +class CLCompileContext; +class CLPadLayerKernel; +class CLCopyKernel; class ICLTensor; /** Basic function to pad a tensor. This function calls the following OpenCL functions/kernels: @@ -51,6 +53,8 @@ public: CLPadLayer &operator=(const CLPadLayer &) = delete; /** Default move assignment operator */ CLPadLayer &operator=(CLPadLayer &&) = default; + /** Default destructor */ + ~CLPadLayer(); /** Initialize the function * @@ -95,9 +99,9 @@ public: private: void configure_reflect_mode(ICLTensor *input, ICLTensor *output); - CLPadLayerKernel _pad_kernel; - CLCopyKernel _copy_kernel; - bool _perform_pad; + std::unique_ptr _pad_kernel; + std::unique_ptr _copy_kernel; + bool _perform_pad; }; } // namespace arm_compute #endif /*ARM_COMPUTE_PADLAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLPermute.h b/arm_compute/runtime/CL/functions/CLPermute.h index abc23eff0c..50e81da7c4 100644 --- a/arm_compute/runtime/CL/functions/CLPermute.h +++ b/arm_compute/runtime/CL/functions/CLPermute.h @@ -31,7 +31,9 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to execute an @ref CLPermuteKernel. */ class CLPermute : public ICLSimpleFunction diff --git a/arm_compute/runtime/CL/functions/CLPhase.h b/arm_compute/runtime/CL/functions/CLPhase.h index 2731a08a52..34b8e72175 100644 --- a/arm_compute/runtime/CL/functions/CLPhase.h +++ b/arm_compute/runtime/CL/functions/CLPhase.h @@ -29,6 +29,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to execute an @ref CLMagnitudePhaseKernel. */ diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h index 2066012306..6432cd040d 100644 --- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h +++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h @@ -24,14 +24,16 @@ #ifndef ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H #define ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/runtime/CL/ICLOperator.h" #include "arm_compute/runtime/IFunction.h" namespace arm_compute { // Forward declaration +class CLCompileContext; +class CLFillBorderKernel; class ICLTensor; +class ITensorInfo; namespace experimental { @@ -106,7 +108,7 @@ public: void run(ITensorPack &tensors) override; private: - CLFillBorderKernel _border_handler; + std::unique_ptr _border_handler; }; /** Basic function to run @ref CLComplexPixelWiseMultiplicationKernel. */ @@ -139,7 +141,7 @@ public: void run(ITensorPack &tensors) override; private: - CLFillBorderKernel _border_handler; + std::unique_ptr _border_handler; }; } // namespace experimental diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h index 96dacf9322..ef1f426c22 100644 --- a/arm_compute/runtime/CL/functions/CLPoolingLayer.h +++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h @@ -31,7 +31,9 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenCL kernels: * diff --git a/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h b/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h index 9a78e77307..9129bfd064 100644 --- a/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h +++ b/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h @@ -24,13 +24,16 @@ #ifndef ARM_COMPUTE_CLPRIORBOXLAYER_H #define ARM_COMPUTE_CLPRIORBOXLAYER_H -#include "arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h" +#include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { +class CLCompileContext; +class CLPriorBoxLayerKernel; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLPriorBoxLayerKernel. */ class CLPriorBoxLayer : public ICLSimpleFunction diff --git a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h index 6e537680ee..a8f9221b3d 100644 --- a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h +++ b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h @@ -24,9 +24,6 @@ #ifndef ARM_COMPUTE_CLQLSTMLAYER_H #define ARM_COMPUTE_CLQLSTMLAYER_H -#include "arm_compute/core/CL/kernels/CLCopyKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h" -#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" #include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" @@ -40,7 +37,12 @@ namespace arm_compute { // Forward declarations +class CLCompileContext; +class CLCopyKernel; class ICLTensor; +class CLGEMMLowpMatrixAReductionKernel; +class CLQLSTMLayerNormalizationKernel; +class ITensorInfo; /** Basic function to run @ref CLQLSTMLayer * @@ -68,6 +70,8 @@ public: CLQLSTMLayer &operator=(const CLQLSTMLayer &) = delete; /** Default move assignment operator */ CLQLSTMLayer &operator=(CLQLSTMLayer &&) = default; + /** Default destructor */ + ~CLQLSTMLayer(); /** Initialize function's tensors. * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED. @@ -285,72 +289,72 @@ private: }; // Functions used - CLTranspose _transpose_input_to_forget_weights{}; - CLTranspose _transpose_input_to_cell_weights{}; - CLTranspose _transpose_input_to_output_weights{}; - CLTranspose _transpose_input_to_input_weights{}; - CLTranspose _transpose_recurrent_to_forget_weights{}; - CLTranspose _transpose_recurrent_to_cell_weights{}; - CLTranspose _transpose_recurrent_to_output_weights{}; - CLTranspose _transpose_recurrent_to_input_weights{}; - CLTranspose _transpose_projection_weights{}; - CLGEMMLowpMatrixAReductionKernel _input_to_input_reduction{}; - CLGEMMLowpMatrixAReductionKernel _recurrent_to_input_reduction{}; - CLGEMMLowpMatrixAReductionKernel _input_to_forget_reduction{}; - CLGEMMLowpMatrixAReductionKernel _recurrent_to_forget_reduction{}; - CLGEMMLowpMatrixAReductionKernel _input_to_cell_reduction{}; - CLGEMMLowpMatrixAReductionKernel _recurrent_to_cell_reduction{}; - CLGEMMLowpMatrixAReductionKernel _input_to_output_reduction{}; - CLGEMMLowpMatrixAReductionKernel _recurrent_to_output_reduction{}; - CLGEMMLowpMatrixAReductionKernel _projection_reduction{}; - CLArithmeticAddition _projection_bias_add{}; - CLGEMMLowpMatrixMultiplyCore _mm_input_to_forget{}; - CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{}; - CLPixelWiseMultiplication _pixelwise_mul_cell_to_forget{}; - CLGEMMLowpOutputStage _input_to_forget_outstage{}; - CLGEMMLowpOutputStage _recurrent_to_forget_outstage{}; - CLGEMMLowpOutputStage _cell_to_forget_outstage{}; - CLArithmeticAddition _accumulate_input_recurrent_forget{}; - CLArithmeticAddition _accumulate_cell_forget{}; - CLActivationLayer _forget_gate_sigmoid{}; - CLGEMMLowpMatrixMultiplyCore _mm_input_to_cell{}; - CLGEMMLowpOutputStage _input_to_cell_outstage{}; - CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{}; - CLGEMMLowpOutputStage _recurrent_to_cell_outstage{}; - CLArithmeticAddition _accumulate_input_recurrent_modulation{}; - CLActivationLayer _cell_gate_tanh{}; - CLArithmeticSubtraction _input_gate_sub{}; - CLGEMMLowpMatrixMultiplyCore _mm_input_to_input{}; - CLGEMMLowpOutputStage _input_to_input_outstage{}; - CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{}; - CLGEMMLowpOutputStage _recurrent_to_input_outstage{}; - CLArithmeticAddition _accumulate_input_recurrent_input{}; - CLPixelWiseMultiplication _pixelwise_mul_cell_to_input{}; - CLGEMMLowpOutputStage _cell_to_input_outstage{}; - CLArithmeticAddition _accumulate_cell_input{}; - CLActivationLayer _input_gate_sigmoid{}; - CLPixelWiseMultiplication _pixelwise_mul_forget_cell{}; - CLPixelWiseMultiplication _pixelwise_mul_input_cell{}; - CLArithmeticAddition _add_forget_cell{}; - CLActivationLayer _cell_clip{}; - CLGEMMLowpMatrixMultiplyCore _mm_input_to_output{}; - CLGEMMLowpOutputStage _input_to_output_outstage{}; - CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{}; - CLGEMMLowpOutputStage _recurrent_to_output_outstage{}; - CLArithmeticAddition _accumulate_input_recurrent_output{}; - CLPixelWiseMultiplication _pixelwise_mul_cell_to_output{}; - CLGEMMLowpOutputStage _cell_to_output_outstage{}; - CLArithmeticAddition _accumulate_cell_to_output{}; - CLActivationLayer _output_gate_sigmoid{}; - CLActivationLayer _hidden_tanh{}; - CLPixelWiseMultiplication _pixelwise_mul_hidden{}; - CLGEMMLowpOutputStage _hidden_outstage{}; - CLGEMMLowpMatrixMultiplyCore _mm_projection{}; - CLGEMMLowpOutputStage _projection_outstage{}; - CLArithmeticAddition _accumulate_projection{}; - CLActivationLayer _projection_clip{}; - std::array _layer_norms{ {} }; - CLCopyKernel _copy_output{}; + CLTranspose _transpose_input_to_forget_weights{}; + CLTranspose _transpose_input_to_cell_weights{}; + CLTranspose _transpose_input_to_output_weights{}; + CLTranspose _transpose_input_to_input_weights{}; + CLTranspose _transpose_recurrent_to_forget_weights{}; + CLTranspose _transpose_recurrent_to_cell_weights{}; + CLTranspose _transpose_recurrent_to_output_weights{}; + CLTranspose _transpose_recurrent_to_input_weights{}; + CLTranspose _transpose_projection_weights{}; + std::unique_ptr _input_to_input_reduction; + std::unique_ptr _recurrent_to_input_reduction; + std::unique_ptr _input_to_forget_reduction; + std::unique_ptr _recurrent_to_forget_reduction; + std::unique_ptr _input_to_cell_reduction; + std::unique_ptr _recurrent_to_cell_reduction; + std::unique_ptr _input_to_output_reduction; + std::unique_ptr _recurrent_to_output_reduction; + std::unique_ptr _projection_reduction; + CLArithmeticAddition _projection_bias_add{}; + CLGEMMLowpMatrixMultiplyCore _mm_input_to_forget{}; + CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{}; + CLPixelWiseMultiplication _pixelwise_mul_cell_to_forget{}; + CLGEMMLowpOutputStage _input_to_forget_outstage{}; + CLGEMMLowpOutputStage _recurrent_to_forget_outstage{}; + CLGEMMLowpOutputStage _cell_to_forget_outstage{}; + CLArithmeticAddition _accumulate_input_recurrent_forget{}; + CLArithmeticAddition _accumulate_cell_forget{}; + CLActivationLayer _forget_gate_sigmoid{}; + CLGEMMLowpMatrixMultiplyCore _mm_input_to_cell{}; + CLGEMMLowpOutputStage _input_to_cell_outstage{}; + CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{}; + CLGEMMLowpOutputStage _recurrent_to_cell_outstage{}; + CLArithmeticAddition _accumulate_input_recurrent_modulation{}; + CLActivationLayer _cell_gate_tanh{}; + CLArithmeticSubtraction _input_gate_sub{}; + CLGEMMLowpMatrixMultiplyCore _mm_input_to_input{}; + CLGEMMLowpOutputStage _input_to_input_outstage{}; + CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{}; + CLGEMMLowpOutputStage _recurrent_to_input_outstage{}; + CLArithmeticAddition _accumulate_input_recurrent_input{}; + CLPixelWiseMultiplication _pixelwise_mul_cell_to_input{}; + CLGEMMLowpOutputStage _cell_to_input_outstage{}; + CLArithmeticAddition _accumulate_cell_input{}; + CLActivationLayer _input_gate_sigmoid{}; + CLPixelWiseMultiplication _pixelwise_mul_forget_cell{}; + CLPixelWiseMultiplication _pixelwise_mul_input_cell{}; + CLArithmeticAddition _add_forget_cell{}; + CLActivationLayer _cell_clip{}; + CLGEMMLowpMatrixMultiplyCore _mm_input_to_output{}; + CLGEMMLowpOutputStage _input_to_output_outstage{}; + CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{}; + CLGEMMLowpOutputStage _recurrent_to_output_outstage{}; + CLArithmeticAddition _accumulate_input_recurrent_output{}; + CLPixelWiseMultiplication _pixelwise_mul_cell_to_output{}; + CLGEMMLowpOutputStage _cell_to_output_outstage{}; + CLArithmeticAddition _accumulate_cell_to_output{}; + CLActivationLayer _output_gate_sigmoid{}; + CLActivationLayer _hidden_tanh{}; + CLPixelWiseMultiplication _pixelwise_mul_hidden{}; + CLGEMMLowpOutputStage _hidden_outstage{}; + CLGEMMLowpMatrixMultiplyCore _mm_projection{}; + CLGEMMLowpOutputStage _projection_outstage{}; + CLArithmeticAddition _accumulate_projection{}; + CLActivationLayer _projection_clip{}; + std::array, _layer_norm_count> _layer_norms; + std::unique_ptr _copy_output; TensorCopyKernel _projection_bias_copy{}; TensorCopyKernel _projection_output_to_accumulate_copy{}; @@ -402,30 +406,11 @@ private: inline CLQLSTMLayerNormalizationKernel &get_layer_norm(LayerNormGate g) { - return _layer_norms[getGateIndex(g)]; + return *_layer_norms[getGateIndex(g)]; } - inline void configure_layer_norm(LayerNormGate g, const ICLTensor *in) - { - ARM_COMPUTE_ERROR_ON(!_has_layer_norm); - - CLTensor *out = &get_layer_norm_output(g); - _memory_group.manage(out); - out->allocator()->init(*(in->info())); - - get_layer_norm(g).configure(in, out, get_layer_norm_weight(g), get_layer_norm_bias(g)); - } - - inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias) - { - // Output quantization scale will be different, but ignored here - // since it will be configured at configure() stage. - const TensorInfo out - { - in - }; - return CLQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias); - } + inline void configure_layer_norm(LayerNormGate g, const ICLTensor *in); + inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias); // Temporary tensors CLTensor _input_to_forget_weights_transposed{ nullptr }; diff --git a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h index e045adf5fd..a0a27c5cb4 100644 --- a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h @@ -24,11 +24,14 @@ #ifndef ARM_COMPUTE_CLQUANTIZATIONLAYER_H #define ARM_COMPUTE_CLQUANTIZATIONLAYER_H +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to simulate a quantization layer. This function calls the following CL kernels: * diff --git a/arm_compute/runtime/CL/functions/CLRNNLayer.h b/arm_compute/runtime/CL/functions/CLRNNLayer.h index 9d1cb1a724..ff3fb5449b 100644 --- a/arm_compute/runtime/CL/functions/CLRNNLayer.h +++ b/arm_compute/runtime/CL/functions/CLRNNLayer.h @@ -24,15 +24,17 @@ #ifndef ARM_COMPUTE_CLRNN_LAYER_H #define ARM_COMPUTE_CLRNN_LAYER_H -#include "arm_compute/core/CL/kernels/CLCopyKernel.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" #include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" #include "arm_compute/runtime/CL/functions/CLGEMM.h" +#include + namespace arm_compute { +class CLCopyKernel; class ICLTensor; /** Basic function to run @ref CLRNNLayer */ @@ -41,6 +43,12 @@ class CLRNNLayer : public IFunction public: /** Default constructor */ CLRNNLayer(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied */ + CLRNNLayer(const CLRNNLayer &) = delete; + /** Prevent instances of this class from being copied */ + CLRNNLayer &operator=(const CLRNNLayer &) = delete; + /** Default destructor */ + ~CLRNNLayer(); /** Initialize the function * * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32 @@ -85,16 +93,16 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - CLGEMM _gemm_state_f; - CLArithmeticAddition _add_kernel; - CLActivationLayer _activation; - CLFullyConnectedLayer _fully_connected_kernel; - CLCopyKernel _copy_kernel; - CLTensor _fully_connected_out; - CLTensor _gemm_output; - CLTensor _add_output; - bool _is_prepared; + MemoryGroup _memory_group; + CLGEMM _gemm_state_f; + CLArithmeticAddition _add_kernel; + CLActivationLayer _activation; + CLFullyConnectedLayer _fully_connected_kernel; + std::unique_ptr _copy_kernel; + CLTensor _fully_connected_out; + CLTensor _gemm_output; + CLTensor _add_output; + bool _is_prepared; }; } #endif /* ARM_COMPUTE_CLRNN_LAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLROIAlignLayer.h b/arm_compute/runtime/CL/functions/CLROIAlignLayer.h index 2e78f16d6b..b4cd5560ef 100644 --- a/arm_compute/runtime/CL/functions/CLROIAlignLayer.h +++ b/arm_compute/runtime/CL/functions/CLROIAlignLayer.h @@ -25,12 +25,14 @@ #define ARM_COMPUTE_CLROIALIGNLAYER_H #include "arm_compute/core/CL/ICLArray.h" -#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ROIPoolingLayerInfo; +class ITensorInfo; /** Basic function to run @ref CLROIAlignLayerKernel. * diff --git a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h index 30139274be..836575ef68 100644 --- a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h +++ b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h @@ -24,14 +24,14 @@ #ifndef ARM_COMPUTE_CLROIPOOLINGLAYER_H #define ARM_COMPUTE_CLROIPOOLINGLAYER_H -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - #include "arm_compute/core/CL/ICLArray.h" -#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ROIPoolingLayerInfo; /** Basic function to run @ref CLROIPoolingLayerKernel. * diff --git a/arm_compute/runtime/CL/functions/CLRange.h b/arm_compute/runtime/CL/functions/CLRange.h index a86cfb605d..e11e740861 100644 --- a/arm_compute/runtime/CL/functions/CLRange.h +++ b/arm_compute/runtime/CL/functions/CLRange.h @@ -29,7 +29,9 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLRangeKernel * diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h index 5d050d71d6..3fbcee6c21 100644 --- a/arm_compute/runtime/CL/functions/CLReductionOperation.h +++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_CLREDUCTIONOPERATION_H #define ARM_COMPUTE_CLREDUCTIONOPERATION_H -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" -#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLReshapeLayer.h" #include "arm_compute/runtime/IFunction.h" @@ -37,6 +35,9 @@ namespace arm_compute { // Forward declarations +class CLCompileContext; +class CLFillBorderKernel; +class CLReductionOperationKernel; class ICLTensor; /** Perform reduction operation. @@ -49,6 +50,16 @@ public: * @param[in] memory_manager (Optional) Memory manager. */ CLReductionOperation(std::shared_ptr memory_manager = nullptr); + /** Default Destructor */ + ~CLReductionOperation(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLReductionOperation(const CLReductionOperation &) = delete; + /** Default move constructor */ + CLReductionOperation(CLReductionOperation &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLReductionOperation &operator=(const CLReductionOperation &) = delete; + /** Default move assignment operator */ + CLReductionOperation &operator=(CLReductionOperation &&) = default; /** Set the input and output tensors. * @@ -88,15 +99,15 @@ public: private: ICLTensor *configure_intermediate_result_vector(ICLTensor *input, ICLTensor *output); - MemoryGroup _memory_group; - std::vector _results_vector; - std::vector _reduction_kernels_vector; - std::vector _border_handlers_vector; - CLReshapeLayer _reshape; - unsigned int _num_of_stages; - unsigned int _reduction_axis; - bool _is_serial; - bool _is_reshape_required; + MemoryGroup _memory_group; + std::vector _results_vector; + std::vector> _reduction_kernels_vector; + std::vector> _border_handlers_vector; + CLReshapeLayer _reshape; + unsigned int _num_of_stages; + unsigned int _reduction_axis; + bool _is_serial; + bool _is_reshape_required; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLREDUCTIONOPERATION_H */ \ No newline at end of file diff --git a/arm_compute/runtime/CL/functions/CLRemap.h b/arm_compute/runtime/CL/functions/CLRemap.h index 5b110d58f4..bf5d348b3b 100644 --- a/arm_compute/runtime/CL/functions/CLRemap.h +++ b/arm_compute/runtime/CL/functions/CLRemap.h @@ -31,6 +31,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to execute remap. This function calls the following OpenCL kernels: diff --git a/arm_compute/runtime/CL/functions/CLReorgLayer.h b/arm_compute/runtime/CL/functions/CLReorgLayer.h index a7287ce266..0840fd13fd 100644 --- a/arm_compute/runtime/CL/functions/CLReorgLayer.h +++ b/arm_compute/runtime/CL/functions/CLReorgLayer.h @@ -29,7 +29,9 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; class CLReorgLayer : public ICLSimpleFunction { diff --git a/arm_compute/runtime/CL/functions/CLReshapeLayer.h b/arm_compute/runtime/CL/functions/CLReshapeLayer.h index 7fc6c3b864..b4d52ec8cf 100644 --- a/arm_compute/runtime/CL/functions/CLReshapeLayer.h +++ b/arm_compute/runtime/CL/functions/CLReshapeLayer.h @@ -29,7 +29,9 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLReshapeLayerKernel */ class CLReshapeLayer : public IFunction diff --git a/arm_compute/runtime/CL/functions/CLReverse.h b/arm_compute/runtime/CL/functions/CLReverse.h index 6b140920e9..81fa04b1f5 100644 --- a/arm_compute/runtime/CL/functions/CLReverse.h +++ b/arm_compute/runtime/CL/functions/CLReverse.h @@ -24,11 +24,14 @@ #ifndef ARM_COMPUTE_CLREVERSE_H #define ARM_COMPUTE_CLREVERSE_H +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLReverseKernel */ class CLReverse : public ICLSimpleFunction diff --git a/arm_compute/runtime/CL/functions/CLScale.h b/arm_compute/runtime/CL/functions/CLScale.h index d776e83035..360d63ea22 100644 --- a/arm_compute/runtime/CL/functions/CLScale.h +++ b/arm_compute/runtime/CL/functions/CLScale.h @@ -32,7 +32,9 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLScaleKernel */ class CLScale : public ICLSimpleFunction diff --git a/arm_compute/runtime/CL/functions/CLScharr3x3.h b/arm_compute/runtime/CL/functions/CLScharr3x3.h index 3892874f35..19c860f39b 100644 --- a/arm_compute/runtime/CL/functions/CLScharr3x3.h +++ b/arm_compute/runtime/CL/functions/CLScharr3x3.h @@ -31,6 +31,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to execute scharr 3x3 filter. This function calls the following OpenCL kernels: diff --git a/arm_compute/runtime/CL/functions/CLSelect.h b/arm_compute/runtime/CL/functions/CLSelect.h index a1af922303..7fd52312fb 100644 --- a/arm_compute/runtime/CL/functions/CLSelect.h +++ b/arm_compute/runtime/CL/functions/CLSelect.h @@ -24,14 +24,15 @@ #ifndef ARM_COMPUTE_CLSELECT_H #define ARM_COMPUTE_CLSELECT_H -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { // Forward declarations +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLSelect */ class CLSelect : public ICLSimpleFunction diff --git a/arm_compute/runtime/CL/functions/CLSlice.h b/arm_compute/runtime/CL/functions/CLSlice.h index 23c398cb41..f17e77236d 100644 --- a/arm_compute/runtime/CL/functions/CLSlice.h +++ b/arm_compute/runtime/CL/functions/CLSlice.h @@ -31,6 +31,8 @@ namespace arm_compute { // Forward Declarations class ICLTensor; +class CLCompileContext; +class ITensorInfo; namespace experimental { diff --git a/arm_compute/runtime/CL/functions/CLSobel3x3.h b/arm_compute/runtime/CL/functions/CLSobel3x3.h index 25d4ed6895..492900da11 100644 --- a/arm_compute/runtime/CL/functions/CLSobel3x3.h +++ b/arm_compute/runtime/CL/functions/CLSobel3x3.h @@ -31,6 +31,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to execute sobel 3x3 filter. This function calls the following OpenCL kernels: @@ -42,6 +43,14 @@ class ICLTensor; class CLSobel3x3 : public ICLSimpleFunction { public: + /** Default Constructor */ + CLSobel3x3() = default; + /** Prevent instances of this class from being copied */ + CLSobel3x3(const CLSobel3x3 &) = delete; + /** Prevent instances of this class from being copied */ + CLSobel3x3 &operator=(const CLSobel3x3 &) = delete; + /** Default destructor */ + ~CLSobel3x3(); /** Initialise the function's source, destinations and border mode. * * @note At least one of output_x or output_y must be not NULL. diff --git a/arm_compute/runtime/CL/functions/CLSobel5x5.h b/arm_compute/runtime/CL/functions/CLSobel5x5.h index 1f91c46f7f..a00fdd72b8 100644 --- a/arm_compute/runtime/CL/functions/CLSobel5x5.h +++ b/arm_compute/runtime/CL/functions/CLSobel5x5.h @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_CLSOBEL5X5_H #define ARM_COMPUTE_CLSOBEL5X5_H -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" -#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" @@ -37,6 +35,10 @@ namespace arm_compute { +class CLCompileContext; +class CLFillBorderKernel; +class CLSobel5x5HorKernel; +class CLSobel5x5VertKernel; class ICLTensor; /** Basic function to execute sobel 5x5 filter. This function calls the following OpenCL kernels: @@ -54,6 +56,12 @@ public: * @param[in] memory_manager (Optional) Memory manager. */ CLSobel5x5(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied */ + CLSobel5x5(const CLSobel5x5 &) = delete; + /** Prevent instances of this class from being copied */ + CLSobel5x5 &operator=(const CLSobel5x5 &) = delete; + /** Default destructor */ + ~CLSobel5x5(); /** Initialise the function's source, destinations and border mode. * * @note At least one of output_x or output_y must be not NULL. @@ -82,12 +90,12 @@ public: void run() override; protected: - MemoryGroup _memory_group; /**< Function's memory group */ - CLSobel5x5HorKernel _sobel_hor; /**< Sobel Horizontal 5x5 kernel */ - CLSobel5x5VertKernel _sobel_vert; /**< Sobel Vertical 5x5 kernel */ - CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */ - CLImage _tmp_x; /**< Temporary buffer for Sobel X */ - CLImage _tmp_y; /**< Temporary buffer for Sobel Y */ + MemoryGroup _memory_group; /**< Function's memory group */ + std::unique_ptr _sobel_hor; /**< Sobel Horizontal 5x5 kernel */ + std::unique_ptr _sobel_vert; /**< Sobel Vertical 5x5 kernel */ + std::unique_ptr _border_handler; /**< Kernel to handle image borders */ + CLImage _tmp_x; /**< Temporary buffer for Sobel X */ + CLImage _tmp_y; /**< Temporary buffer for Sobel Y */ }; } #endif /*ARM_COMPUTE_CLSOBEL5X5_H */ diff --git a/arm_compute/runtime/CL/functions/CLSobel7x7.h b/arm_compute/runtime/CL/functions/CLSobel7x7.h index 91daf64c29..01a863b11b 100644 --- a/arm_compute/runtime/CL/functions/CLSobel7x7.h +++ b/arm_compute/runtime/CL/functions/CLSobel7x7.h @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_CLSOBEL7X7_H #define ARM_COMPUTE_CLSOBEL7X7_H -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" -#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" @@ -37,6 +35,10 @@ namespace arm_compute { +class CLCompileContext; +class CLFillBorderKernel; +class CLSobel7x7HorKernel; +class CLSobel7x7VertKernel; class ICLTensor; /** Basic function to execute sobel 7x7 filter. This function calls the following OpenCL kernels: @@ -54,6 +56,12 @@ public: * @param[in] memory_manager (Optional) Memory manager. */ CLSobel7x7(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied */ + CLSobel7x7(const CLSobel7x7 &) = delete; + /** Prevent instances of this class from being copied */ + CLSobel7x7 &operator=(const CLSobel7x7 &) = delete; + /** Default destructor */ + ~CLSobel7x7(); /** Initialise the function's source, destinations and border mode. * * @note At least one of output_x or output_y must be not NULL. @@ -82,12 +90,12 @@ public: void run() override; protected: - MemoryGroup _memory_group; /**< Function's memory group */ - CLSobel7x7HorKernel _sobel_hor; /**< Sobel Horizontal 7x7 kernel */ - CLSobel7x7VertKernel _sobel_vert; /**< Sobel Vertical 7x7 kernel */ - CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */ - CLImage _tmp_x; /**< Temporary buffer for Sobel X */ - CLImage _tmp_y; /**< Temporary buffer for Sobel Y */ + MemoryGroup _memory_group; /**< Function's memory group */ + std::unique_ptr _sobel_hor; /**< Sobel Horizontal 7x7 kernel */ + std::unique_ptr _sobel_vert; /**< Sobel Vertical 7x7 kernel */ + std::unique_ptr _border_handler; /**< Kernel to handle image borders */ + CLImage _tmp_x; /**< Temporary buffer for Sobel X */ + CLImage _tmp_y; /**< Temporary buffer for Sobel Y */ }; } #endif /*ARM_COMPUTE_CLSOBEL7X7_H */ diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h index fd71f3ed4d..ab10a64de4 100644 --- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_CLSOFTMAXLAYER_H #define ARM_COMPUTE_CLSOFTMAXLAYER_H -#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLPermute.h" #include "arm_compute/runtime/IFunction.h" @@ -35,7 +34,11 @@ namespace arm_compute { +class CLCompileContext; +class CLLogits1DMaxShiftExpSumKernel; +class CLLogits1DNormKernel; class ICLTensor; +class ITensorInfo; /** Basic function to compute a SoftmaxLayer. * @@ -57,6 +60,16 @@ class CLSoftmaxLayerGeneric : public IFunction public: /** Constructor */ CLSoftmaxLayerGeneric(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied */ + CLSoftmaxLayerGeneric(const CLSoftmaxLayerGeneric &) = delete; + /** Prevent instances of this class from being copied */ + CLSoftmaxLayerGeneric &operator=(const CLSoftmaxLayerGeneric &) = delete; + /** Prevent instances of this class to be moved */ + CLSoftmaxLayerGeneric(CLSoftmaxLayerGeneric &&) = delete; + /** Prevent instances of this class to be moved */ + CLSoftmaxLayerGeneric &operator=(CLSoftmaxLayerGeneric &&) = delete; + /** Default destructor */ + ~CLSoftmaxLayerGeneric(); /** Set the input and output tensors. * * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax @@ -92,17 +105,17 @@ public: void run() override; private: - MemoryGroup _memory_group; - CLPermute _permute_input; - CLPermute _permute_output; - CLLogits1DMaxShiftExpSumKernel _max_shift_exp_sum_kernel; - CLLogits1DNormKernel _norm_kernel; - CLTensor _max; - CLTensor _sum; - CLTensor _tmp; - CLTensor _input_permuted; - CLTensor _output_permuted; - bool _needs_permute; + MemoryGroup _memory_group; + CLPermute _permute_input; + CLPermute _permute_output; + std::unique_ptr _max_shift_exp_sum_kernel; + std::unique_ptr _norm_kernel; + CLTensor _max; + CLTensor _sum; + CLTensor _tmp; + CLTensor _input_permuted; + CLTensor _output_permuted; + bool _needs_permute; }; using CLSoftmaxLayer = CLSoftmaxLayerGeneric; diff --git a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h index c6f7f11079..1611aa8ed4 100644 --- a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h +++ b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h @@ -24,16 +24,19 @@ #ifndef ARM_COMPUTE_CLSPACETOBATCHLAYER_H #define ARM_COMPUTE_CLSPACETOBATCHLAYER_H -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/CL/kernels/CLMemsetKernel.h" -#include "arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +#include namespace arm_compute { +class CLCompileContext; +class CLMemsetKernel; +class CLSpaceToBatchLayerKernel; class ICLTensor; +class ITensorInfo; /** Basic function to spatial divide a tensor. This function calls the following OpenCL kernels/functions: * @@ -54,7 +57,7 @@ public: /** Allow instances of this class to be moved */ CLSpaceToBatchLayer &operator=(CLSpaceToBatchLayer &&) = default; /** Default destructor */ - virtual ~CLSpaceToBatchLayer() = default; + ~CLSpaceToBatchLayer(); /** Set the input and output tensors. * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. @@ -121,9 +124,9 @@ public: void run() override; private: - CLSpaceToBatchLayerKernel _space_to_batch_kernel; /**< SpaceToBatch kernel to run */ - CLMemsetKernel _memset_kernel; /**< Memset kernel to run */ - bool _has_padding; /**< Flag to check if the output has padding */ + std::unique_ptr _space_to_batch_kernel; /**< SpaceToBatch kernel to run */ + std::unique_ptr _memset_kernel; /**< Memset kernel to run */ + bool _has_padding; /**< Flag to check if the output has padding */ }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLSPACETOBATCHLAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h index 24830cf4d3..9e476fe7bd 100644 --- a/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h +++ b/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h @@ -24,14 +24,17 @@ #ifndef ARM_COMPUTE_CLSPACETODEPTHLAYER_H #define ARM_COMPUTE_CLSPACETODEPTHLAYER_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h" -#include "arm_compute/core/Types.h" +#include namespace arm_compute { +class CLCompileContext; +class CLSpaceToDepthLayerKernel; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLSpaceToDepthLayerKernel. */ class CLSpaceToDepthLayer : public IFunction @@ -39,6 +42,16 @@ class CLSpaceToDepthLayer : public IFunction public: /** Default constructor */ CLSpaceToDepthLayer(); + /** Prevent instances of this class from being copied */ + CLSpaceToDepthLayer(const CLSpaceToDepthLayer &) = delete; + /** Prevent instances of this class from being copied */ + CLSpaceToDepthLayer &operator=(const CLSpaceToDepthLayer &) = delete; + /** Prevent instances of this class to be moved */ + CLSpaceToDepthLayer(CLSpaceToDepthLayer &&) = delete; + /** Prevent instances of this class to be moved */ + CLSpaceToDepthLayer &operator=(CLSpaceToDepthLayer &&) = delete; + /** Default destructor */ + ~CLSpaceToDepthLayer(); /** Set the input and output tensors. * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. @@ -68,7 +81,7 @@ public: void run() override; private: - CLSpaceToDepthLayerKernel _space_to_depth_kernel; /**< CLSpaceToDepthLayerKernel to run */ + std::unique_ptr _space_to_depth_kernel; /**< CLSpaceToDepthLayerKernel to run */ }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLSPACETODEPTHLAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLStackLayer.h b/arm_compute/runtime/CL/functions/CLStackLayer.h index 95875962c8..3861fd299a 100644 --- a/arm_compute/runtime/CL/functions/CLStackLayer.h +++ b/arm_compute/runtime/CL/functions/CLStackLayer.h @@ -27,14 +27,15 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/CL/kernels/CLStackLayerKernel.h" - #include #include namespace arm_compute { +class CLCompileContext; +class CLStackLayerKernel; class ICLTensor; +class ITensorInfo; /** Basic function to stack tensors along an axis. This function calls the following kernel: * @@ -46,6 +47,16 @@ class CLStackLayer : public IFunction public: /** Default constructor */ CLStackLayer(); + /** Prevent instances of this class from being copied */ + CLStackLayer(const CLStackLayer &) = delete; + /** Prevent instances of this class from being copied */ + CLStackLayer &operator=(const CLStackLayer &) = delete; + /** Prevent instances of this class to be moved */ + CLStackLayer(CLStackLayer &&) = delete; + /** Prevent instances of this class to be moved */ + CLStackLayer &operator=(CLStackLayer &&) = delete; + /** Default destructor */ + ~CLStackLayer(); /** Initialise the kernel's inputs vector and output. * * @note Supported input tensor rank: up to 4 @@ -84,9 +95,9 @@ public: void run() override; private: - std::vector _input; - std::vector _stack_kernels; - unsigned int _num_inputs; + std::vector _input; + std::vector> _stack_kernels; + unsigned int _num_inputs; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLSTACKLAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLTableLookup.h b/arm_compute/runtime/CL/functions/CLTableLookup.h index 32d4b7bdf9..ca59309548 100644 --- a/arm_compute/runtime/CL/functions/CLTableLookup.h +++ b/arm_compute/runtime/CL/functions/CLTableLookup.h @@ -28,6 +28,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; class ICLLut; diff --git a/arm_compute/runtime/CL/functions/CLThreshold.h b/arm_compute/runtime/CL/functions/CLThreshold.h index f3af122f0a..2c9213bd01 100644 --- a/arm_compute/runtime/CL/functions/CLThreshold.h +++ b/arm_compute/runtime/CL/functions/CLThreshold.h @@ -33,6 +33,7 @@ namespace arm_compute { // Forward declarations +class CLCompileContext; class ICLTensor; /** Basic function to run @ref CLThresholdKernel */ diff --git a/arm_compute/runtime/CL/functions/CLTile.h b/arm_compute/runtime/CL/functions/CLTile.h index d2f1e9730c..69743693ff 100644 --- a/arm_compute/runtime/CL/functions/CLTile.h +++ b/arm_compute/runtime/CL/functions/CLTile.h @@ -24,13 +24,14 @@ #ifndef ARM_COMPUTE_CLTILE_H #define ARM_COMPUTE_CLTILE_H -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLTileKernel */ class CLTile : public ICLSimpleFunction diff --git a/arm_compute/runtime/CL/functions/CLTranspose.h b/arm_compute/runtime/CL/functions/CLTranspose.h index 9ba7cafce4..2b7a03f23f 100644 --- a/arm_compute/runtime/CL/functions/CLTranspose.h +++ b/arm_compute/runtime/CL/functions/CLTranspose.h @@ -24,11 +24,14 @@ #ifndef ARM_COMPUTE_CLTRANSPOSE_H #define ARM_COMPUTE_CLTRANSPOSE_H +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to transpose a matrix on OpenCL. This function calls the following OpenCL kernel: * diff --git a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h index 07b4c8aecb..88b293069d 100644 --- a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h +++ b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h @@ -26,13 +26,17 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" +#include + namespace arm_compute { +class CLCompileContext; +class CLUpsampleLayerKernel; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLUpsampleLayerKernel */ class CLUpsampleLayer : public IFunction @@ -49,7 +53,7 @@ public: /** Allow instances of this class to be moved */ CLUpsampleLayer &operator=(CLUpsampleLayer &&) = default; /** Default destructor */ - virtual ~CLUpsampleLayer() = default; + ~CLUpsampleLayer(); /** Initialize the function's source, destination, interpolation type and border_mode. * @@ -86,8 +90,8 @@ public: void run() override; private: - CLUpsampleLayerKernel _upsample; - ICLTensor *_output; + std::unique_ptr _upsample; + ICLTensor *_output; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLUPSAMPLELAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLWarpAffine.h b/arm_compute/runtime/CL/functions/CLWarpAffine.h index eb7c05be84..153e9bfdfc 100644 --- a/arm_compute/runtime/CL/functions/CLWarpAffine.h +++ b/arm_compute/runtime/CL/functions/CLWarpAffine.h @@ -31,6 +31,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to run @ref CLWarpAffineKernel for AFFINE transformation */ diff --git a/arm_compute/runtime/CL/functions/CLWarpPerspective.h b/arm_compute/runtime/CL/functions/CLWarpPerspective.h index 2a1f78093e..5c8b5425a4 100644 --- a/arm_compute/runtime/CL/functions/CLWarpPerspective.h +++ b/arm_compute/runtime/CL/functions/CLWarpPerspective.h @@ -31,6 +31,7 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; /** Basic function to run @ref CLWarpPerspectiveKernel for PERSPECTIVE transformation */ diff --git a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h index 602f644230..9ced69c1bb 100644 --- a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H #define ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H -#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h" -#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/functions/CLGEMM.h" #include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h" @@ -33,7 +31,11 @@ namespace arm_compute { +class CLCompileContext; +class CLWinogradFilterTransformKernel; +class CLWinogradOutputTransformKernel; class ICLTensor; +class ITensorInfo; /** Basic function to execute Winograd-based convolution on OpenCL. This function calls the following OpenCL functions/kernels: * @@ -56,6 +58,8 @@ public: CLWinogradConvolutionLayer &operator=(const CLWinogradConvolutionLayer &) = delete; /** Default move assignment operator */ CLWinogradConvolutionLayer &operator=(CLWinogradConvolutionLayer &&) = default; + /** Default destructor */ + ~CLWinogradConvolutionLayer(); /** Set the input and output tensors. * * @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout @@ -122,16 +126,16 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - CLGEMM _batched_mm; - CLWinogradInputTransform _input_transform; - CLWinogradFilterTransformKernel _filter_transform; - CLWinogradOutputTransformKernel _output_transform; - CLTensor _input0; - CLTensor _input1; - CLTensor _batched_mm_output; - const ICLTensor *_original_weights; - bool _is_prepared; + MemoryGroup _memory_group; + CLGEMM _batched_mm; + CLWinogradInputTransform _input_transform; + std::unique_ptr _filter_transform; + std::unique_ptr _output_transform; + CLTensor _input0; + CLTensor _input1; + CLTensor _batched_mm_output; + const ICLTensor *_original_weights; + bool _is_prepared; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h index 351f88012f..8cd809cc1f 100644 --- a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h +++ b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h @@ -31,7 +31,9 @@ namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to execute a @ref CLWinogradInputTransformKernel. */ class CLWinogradInputTransform : public ICLSimpleFunction diff --git a/arm_compute/runtime/CL/functions/CLYOLOLayer.h b/arm_compute/runtime/CL/functions/CLYOLOLayer.h index 3e403f44bd..48ee4ea4f7 100644 --- a/arm_compute/runtime/CL/functions/CLYOLOLayer.h +++ b/arm_compute/runtime/CL/functions/CLYOLOLayer.h @@ -24,13 +24,14 @@ #ifndef ARM_COMPUTE_CLYOLOLAYER_H #define ARM_COMPUTE_CLYOLOLAYER_H -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to run @ref CLYOLOLayerKernel that performs a partial activation on the input * diff --git a/arm_compute/runtime/IOperator.h b/arm_compute/runtime/IOperator.h index 0097383115..fd285160e9 100644 --- a/arm_compute/runtime/IOperator.h +++ b/arm_compute/runtime/IOperator.h @@ -24,14 +24,13 @@ #ifndef ARM_COMPUTE_IOPERATOR_H #define ARM_COMPUTE_IOPERATOR_H -#include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/experimental/Types.h" -#include "arm_compute/runtime/IOperator.h" #include "arm_compute/runtime/IRuntimeContext.h" #include "arm_compute/runtime/Types.h" namespace arm_compute { +class ITensorPack; namespace experimental { /** Base class for all async functions */ diff --git a/docs/04_adding_operator.dox b/docs/04_adding_operator.dox index cf2e78368d..13be712549 100644 --- a/docs/04_adding_operator.dox +++ b/docs/04_adding_operator.dox @@ -80,7 +80,7 @@ There are specific interfaces for OpenCL and Neon: @ref ICLKernel, INEKernel (us There are two others implementation of @ref IKernel called @ref ICLSimpleKernel and INESimpleKernel, they are the interface for simple kernels that have just one input tensor and one output tensor. Creating a new kernel implies adding new files: -- arm_compute/core/CL/kernels/CLReshapeLayerKernel.h +- src/core/CL/kernels/CLReshapeLayerKernel.h - src/core/CL/cl_kernels/reshape_layer.cl - src/core/CL/kernels/CLReshapeLayerKernel.cpp - src/core/CL/CLKernelLibrary.cpp @@ -90,16 +90,16 @@ Neon kernel - src/core/NEON/kernels/NEReshapeLayerKernel.cpp We must register the new layer in the respective libraries: -- arm_compute/core/CL/CLKernels.h +- src/core/CL/CLKernels.h - arm_compute/core/NEON/NEKernels.h These files contain the list of all kernels available in the corresponding Compute Library's backend, for example CLKernels: @code{.cpp} ... -#include "arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h" +#include "src/core/CL/kernels/CLMinMaxLayerKernel.h" +#include "src/core/CL/kernels/CLMinMaxLocationKernel.h" ... -#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h" +#include "src/core/CL/kernels/CLReshapeLayerKernel.h" ... @endcode diff --git a/docs/ComputeLibrary.dir b/docs/ComputeLibrary.dir index 93be52afd7..7733e531cd 100644 --- a/docs/ComputeLibrary.dir +++ b/docs/ComputeLibrary.dir @@ -23,18 +23,10 @@ * @brief Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context. */ -/** @file arm_compute/core/CL/CLKernels.h - * @brief Includes all the OpenCL kernels at once - */ - /** @file arm_compute/core/CL/OpenCL.h * @brief Wrapper to configure the Khronos OpenCL C++ header */ -/** @dir arm_compute/core/CL/kernels - * @brief Folder containing all the OpenCL kernels - */ - /** @dir arm_compute/core/CPP * @brief CPP backend core: kernels and utilities. */ @@ -283,7 +275,7 @@ * @brief Folder containing all the configuration files for GEMM */ -/** @dir src/core/CL/cl_kernels +/** @dir src/core/CL/kernels * @brief All the OpenCL kernels */ diff --git a/examples/cl_cache.cpp b/examples/cl_cache.cpp index 37e1c270d7..6de62f7c5d 100644 --- a/examples/cl_cache.cpp +++ b/examples/cl_cache.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,12 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/CL/CLFunctions.h" - +#include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/Utils.h" +#include "arm_compute/runtime/CL/functions/CLPermute.h" #include "utils/Utils.h" using namespace arm_compute; diff --git a/examples/cl_convolution.cpp b/examples/cl_convolution.cpp index 34b3466f77..bfa53f3379 100644 --- a/examples/cl_convolution.cpp +++ b/examples/cl_convolution.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,9 +25,10 @@ #error "This example needs to be built with -DARM_COMPUTE_CL" #endif /* ARM_COMPUTE_CL */ +#include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/functions/CLConvolution.h" #include "utils/ImageLoader.h" #include "utils/Utils.h" diff --git a/examples/cl_events.cpp b/examples/cl_events.cpp index f578180869..27c063cbc9 100644 --- a/examples/cl_events.cpp +++ b/examples/cl_events.cpp @@ -26,8 +26,10 @@ #endif /* ARM_COMPUTE_CL */ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" +#include "arm_compute/runtime/CL/functions/CLMedian3x3.h" +#include "arm_compute/runtime/CL/functions/CLScale.h" #include "utils/ImageLoader.h" #include "utils/Utils.h" diff --git a/examples/cl_sgemm.cpp b/examples/cl_sgemm.cpp index 7d3b4fe97f..27af228954 100644 --- a/examples/cl_sgemm.cpp +++ b/examples/cl_sgemm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,9 +26,9 @@ #endif /* ARM_COMPUTE_CL */ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTuner.h" +#include "arm_compute/runtime/CL/functions/CLGEMM.h" #include "utils/Utils.h" #include diff --git a/examples/gemm_tuner/cl_gemm_native.cpp b/examples/gemm_tuner/cl_gemm_native.cpp index 43035082a4..02f144ea12 100644 --- a/examples/gemm_tuner/cl_gemm_native.cpp +++ b/examples/gemm_tuner/cl_gemm_native.cpp @@ -26,14 +26,13 @@ #endif /* ARM_COMPUTE_CL */ #include "CommonGemmExampleOptions.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTuner.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h" #include "tests/CL/Helper.h" #include "utils/Utils.h" #include "utils/command_line/CommandLineOptions.h" diff --git a/examples/gemm_tuner/cl_gemm_reshaped.cpp b/examples/gemm_tuner/cl_gemm_reshaped.cpp index 92fa990c87..a4d6203d5c 100644 --- a/examples/gemm_tuner/cl_gemm_reshaped.cpp +++ b/examples/gemm_tuner/cl_gemm_reshaped.cpp @@ -25,17 +25,16 @@ #error "This example needs to be built with -DARM_COMPUTE_CL" #endif /* ARM_COMPUTE_CL */ -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTuner.h" #include "examples/gemm_tuner/CommonGemmExampleOptions.h" #include "examples/gemm_tuner/GemmTunerHelpers.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" #include "tests/CL/Helper.h" #include "utils/Utils.h" #include "utils/command_line/CommandLineOptions.h" diff --git a/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp b/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp index 3a760018e1..cf65d0dd33 100644 --- a/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp +++ b/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp @@ -27,14 +27,13 @@ #include "CommonGemmExampleOptions.h" #include "GemmTunerHelpers.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTuner.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" #include "tests/CL/Helper.h" #include "utils/Utils.h" #include "utils/command_line/CommandLineOptions.h" diff --git a/examples/neoncl_scale_median_gaussian.cpp b/examples/neoncl_scale_median_gaussian.cpp index df0eb9620f..948aff23bb 100644 --- a/examples/neoncl_scale_median_gaussian.cpp +++ b/examples/neoncl_scale_median_gaussian.cpp @@ -26,8 +26,9 @@ #endif /* ARM_COMPUTE_CL */ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" +#include "arm_compute/runtime/CL/functions/CLScale.h" #include "arm_compute/runtime/NEON/NEFunctions.h" #include "utils/ImageLoader.h" #include "utils/Utils.h" diff --git a/src/core/CL/CLKernels.h b/src/core/CL/CLKernels.h new file mode 100644 index 0000000000..282cc96dd8 --- /dev/null +++ b/src/core/CL/CLKernels.h @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLKERNELS_H +#define ARM_COMPUTE_CLKERNELS_H + +/* Header regrouping all the CL kernels */ +#include "src/core/CL/kernels/CLAbsoluteDifferenceKernel.h" +#include "src/core/CL/kernels/CLAccumulateKernel.h" +#include "src/core/CL/kernels/CLActivationLayerKernel.h" +#include "src/core/CL/kernels/CLArgMinMaxLayerKernel.h" +#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h" +#include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h" +#include "src/core/CL/kernels/CLBatchToSpaceLayerKernel.h" +#include "src/core/CL/kernels/CLBitwiseAndKernel.h" +#include "src/core/CL/kernels/CLBitwiseNotKernel.h" +#include "src/core/CL/kernels/CLBitwiseOrKernel.h" +#include "src/core/CL/kernels/CLBitwiseXorKernel.h" +#include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h" +#include "src/core/CL/kernels/CLBox3x3Kernel.h" +#include "src/core/CL/kernels/CLCannyEdgeKernel.h" +#include "src/core/CL/kernels/CLChannelCombineKernel.h" +#include "src/core/CL/kernels/CLChannelExtractKernel.h" +#include "src/core/CL/kernels/CLChannelShuffleLayerKernel.h" +#include "src/core/CL/kernels/CLCol2ImKernel.h" +#include "src/core/CL/kernels/CLColorConvertKernel.h" +#include "src/core/CL/kernels/CLComparisonKernel.h" +#include "src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h" +#include "src/core/CL/kernels/CLConvolutionKernel.h" +#include "src/core/CL/kernels/CLCopyKernel.h" +#include "src/core/CL/kernels/CLCropKernel.h" +#include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h" +#include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h" +#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h" +#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "src/core/CL/kernels/CLDepthToSpaceLayerKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h" +#include "src/core/CL/kernels/CLDequantizationLayerKernel.h" +#include "src/core/CL/kernels/CLDerivativeKernel.h" +#include "src/core/CL/kernels/CLDilateKernel.h" +#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h" +#include "src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h" +#include "src/core/CL/kernels/CLElementwiseOperationKernel.h" +#include "src/core/CL/kernels/CLErodeKernel.h" +#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h" +#include "src/core/CL/kernels/CLFFTRadixStageKernel.h" +#include "src/core/CL/kernels/CLFFTScaleKernel.h" +#include "src/core/CL/kernels/CLFastCornersKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLFlattenLayerKernel.h" +#include "src/core/CL/kernels/CLFloorKernel.h" +#include "src/core/CL/kernels/CLFuseBatchNormalizationKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGatherKernel.h" +#include "src/core/CL/kernels/CLGaussian3x3Kernel.h" +#include "src/core/CL/kernels/CLGaussian5x5Kernel.h" +#include "src/core/CL/kernels/CLGaussianPyramidKernel.h" +#include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h" +#include "src/core/CL/kernels/CLHOGDescriptorKernel.h" +#include "src/core/CL/kernels/CLHOGDetectorKernel.h" +#include "src/core/CL/kernels/CLHarrisCornersKernel.h" +#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h" +#include "src/core/CL/kernels/CLHistogramKernel.h" +#include "src/core/CL/kernels/CLIm2ColKernel.h" +#include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h" +#include "src/core/CL/kernels/CLIntegralImageKernel.h" +#include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h" +#include "src/core/CL/kernels/CLLKTrackerKernel.h" +#include "src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" +#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h" +#include "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h" +#include "src/core/CL/kernels/CLMeanStdDevKernel.h" +#include "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h" +#include "src/core/CL/kernels/CLMedian3x3Kernel.h" +#include "src/core/CL/kernels/CLMemsetKernel.h" +#include "src/core/CL/kernels/CLMinMaxLayerKernel.h" +#include "src/core/CL/kernels/CLMinMaxLocationKernel.h" +#include "src/core/CL/kernels/CLNonLinearFilterKernel.h" +#include "src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h" +#include "src/core/CL/kernels/CLNormalizationLayerKernel.h" +#include "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h" +#include "src/core/CL/kernels/CLPadLayerKernel.h" +#include "src/core/CL/kernels/CLPermuteKernel.h" +#include "src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" +#include "src/core/CL/kernels/CLPoolingLayerKernel.h" +#include "src/core/CL/kernels/CLPriorBoxLayerKernel.h" +#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h" +#include "src/core/CL/kernels/CLQuantizationLayerKernel.h" +#include "src/core/CL/kernels/CLROIAlignLayerKernel.h" +#include "src/core/CL/kernels/CLROIPoolingLayerKernel.h" +#include "src/core/CL/kernels/CLRangeKernel.h" +#include "src/core/CL/kernels/CLReductionOperationKernel.h" +#include "src/core/CL/kernels/CLRemapKernel.h" +#include "src/core/CL/kernels/CLReorgLayerKernel.h" +#include "src/core/CL/kernels/CLReshapeLayerKernel.h" +#include "src/core/CL/kernels/CLReverseKernel.h" +#include "src/core/CL/kernels/CLScaleKernel.h" +#include "src/core/CL/kernels/CLScharr3x3Kernel.h" +#include "src/core/CL/kernels/CLSelectKernel.h" +#include "src/core/CL/kernels/CLSobel3x3Kernel.h" +#include "src/core/CL/kernels/CLSobel5x5Kernel.h" +#include "src/core/CL/kernels/CLSobel7x7Kernel.h" +#include "src/core/CL/kernels/CLSoftmaxLayerKernel.h" +#include "src/core/CL/kernels/CLSpaceToBatchLayerKernel.h" +#include "src/core/CL/kernels/CLSpaceToDepthLayerKernel.h" +#include "src/core/CL/kernels/CLStackLayerKernel.h" +#include "src/core/CL/kernels/CLStridedSliceKernel.h" +#include "src/core/CL/kernels/CLTableLookupKernel.h" +#include "src/core/CL/kernels/CLThresholdKernel.h" +#include "src/core/CL/kernels/CLTileKernel.h" +#include "src/core/CL/kernels/CLTransposeKernel.h" +#include "src/core/CL/kernels/CLUpsampleLayerKernel.h" +#include "src/core/CL/kernels/CLWarpAffineKernel.h" +#include "src/core/CL/kernels/CLWarpPerspectiveKernel.h" +#include "src/core/CL/kernels/CLWeightsReshapeKernel.h" +#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" +#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" +#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h" +#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h" +#include "src/core/CL/kernels/CLWinogradInputTransformKernel.h" +#include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h" +#include "src/core/CL/kernels/CLYOLOLayerKernel.h" +#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" + +#endif /* ARM_COMPUTE_CLKERNELS_H */ diff --git a/src/core/CL/CLTracePoint.cpp b/src/core/CL/CLTracePoint.cpp index 631cb84878..d603f40c26 100644 --- a/src/core/CL/CLTracePoint.cpp +++ b/src/core/CL/CLTracePoint.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/core/TracePoint.h" +#include "arm_compute/core/CL/CLTypes.h" #include "arm_compute/core/CL/ICLArray.h" #include "arm_compute/core/CL/ICLDistribution1D.h" #include "arm_compute/core/CL/ICLHOG.h" @@ -30,7 +31,6 @@ #include "arm_compute/core/CL/ICLMultiHOG.h" #include "arm_compute/core/CL/ICLMultiImage.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h" #include "utils/TypePrinter.h" #include diff --git a/src/core/CL/CLValidate.h b/src/core/CL/CLValidate.h index cbbdf2d9d2..7b5294e452 100644 --- a/src/core/CL/CLValidate.h +++ b/src/core/CL/CLValidate.h @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_CL_VALIDATE_H #define ARM_COMPUTE_CL_VALIDATE_H +#include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/Validate.h" namespace arm_compute diff --git a/src/core/CL/ICLKernel.cpp b/src/core/CL/ICLKernel.cpp index f91510b4a7..2b259bf28a 100644 --- a/src/core/CL/ICLKernel.cpp +++ b/src/core/CL/ICLKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/ICLKernel.h" +#include "src/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/CL/ICLKernel.h b/src/core/CL/ICLKernel.h new file mode 100644 index 0000000000..a24cd8c798 --- /dev/null +++ b/src/core/CL/ICLKernel.h @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ICLKERNEL_H +#define ARM_COMPUTE_ICLKERNEL_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/CLTypes.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/GPUTarget.h" +#include "arm_compute/core/IKernel.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/experimental/Types.h" + +#include + +namespace arm_compute +{ +template +class ICLArray; +class ICLTensor; +class Window; + +/** Common interface for all the OpenCL kernels */ +class ICLKernel : public IKernel +{ +private: + /** Returns the number of arguments enqueued per array object. + * + * @return The number of arguments enqueued per array object. + */ + template + constexpr static unsigned int num_arguments_per_array() + { + return num_arguments_per_tensor(); + } + /** Returns the number of arguments enqueued per tensor object. + * + * @return The number of arguments enqueued per tensor object. + */ + template + constexpr static unsigned int num_arguments_per_tensor() + { + return 2 + 2 * dimension_size; + } + using IKernel::configure; //Prevent children from calling IKernel::configure() directly +protected: + /** Configure the kernel's window and local workgroup size hint. + * + * @param[in] window The maximum window which will be returned by window() + * @param[in] lws_hint (Optional) Local-Workgroup-Size to use. + */ + void configure_internal(const Window &window, cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange()) + { + _lws_hint = lws_hint; + IKernel::configure(window); + } + +public: + /** Constructor */ + ICLKernel() + : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _lws_hint() + { + } + /** Returns a reference to the OpenCL kernel of this object. + * + * @return A reference to the OpenCL kernel of this object. + */ + cl::Kernel &kernel() + { + return _kernel; + } + /** Add the passed 1D array's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] array Array to set as an argument of the object's kernel. + * @param[in] strides @ref Strides object containing stride of each dimension in bytes. + * @param[in] num_dimensions Number of dimensions of the @p array. + * @param[in] window Window the kernel will be executed on. + */ + template + void add_1D_array_argument(unsigned int &idx, const ICLArray *array, const Strides &strides, unsigned int num_dimensions, const Window &window) + { + add_array_argument(idx, array, strides, num_dimensions, window); + } + /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window) + { + add_tensor_argument<1>(idx, tensor, window); + } + /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx if the condition is true. + * + * @param[in] cond Condition to check + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_1D_tensor_argument_if(bool cond, unsigned int &idx, const ICLTensor *tensor, const Window &window) + { + if(cond) + { + add_1D_tensor_argument(idx, tensor, window); + } + } + /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window) + { + add_tensor_argument<2>(idx, tensor, window); + } + /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx if the condition is true. + * + * @param[in] cond Condition to check + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_2D_tensor_argument_if(bool cond, unsigned int &idx, const ICLTensor *tensor, const Window &window) + { + if(cond) + { + add_2D_tensor_argument(idx, tensor, window); + } + } + /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window) + { + add_tensor_argument<3>(idx, tensor, window); + } + /** Add the passed 4D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window) + { + add_tensor_argument<4>(idx, tensor, window); + } + /** Returns the number of arguments enqueued per 1D array object. + * + * @return The number of arguments enqueues per 1D array object. + */ + constexpr static unsigned int num_arguments_per_1D_array() + { + return num_arguments_per_array<1>(); + } + /** Returns the number of arguments enqueued per 1D tensor object. + * + * @return The number of arguments enqueues per 1D tensor object. + */ + constexpr static unsigned int num_arguments_per_1D_tensor() + { + return num_arguments_per_tensor<1>(); + } + /** Returns the number of arguments enqueued per 2D tensor object. + * + * @return The number of arguments enqueues per 2D tensor object. + */ + constexpr static unsigned int num_arguments_per_2D_tensor() + { + return num_arguments_per_tensor<2>(); + } + /** Returns the number of arguments enqueued per 3D tensor object. + * + * @return The number of arguments enqueues per 3D tensor object. + */ + constexpr static unsigned int num_arguments_per_3D_tensor() + { + return num_arguments_per_tensor<3>(); + } + /** Returns the number of arguments enqueued per 4D tensor object. + * + * @return The number of arguments enqueues per 4D tensor object. + */ + constexpr static unsigned int num_arguments_per_4D_tensor() + { + return num_arguments_per_tensor<4>(); + } + /** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue. + * + * @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns. + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel. + */ + virtual void run(const Window &window, cl::CommandQueue &queue) + { + ARM_COMPUTE_UNUSED(window, queue); + } + /** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue. + * + * @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns. + * + * @param[in] tensors A vector containing the tensors to operato on. + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel. + */ + virtual void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) + { + ARM_COMPUTE_UNUSED(tensors, window, queue); + } + /** Add the passed parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the arguments. Will be incremented by the number of kernel arguments set. + * @param[in] value Value to set as an argument of the object's kernel. + */ + template + void add_argument(unsigned int &idx, T value) + { + _kernel.setArg(idx++, value); + } + + /** Set the Local-Workgroup-Size hint + * + * @note This method should be called after the configuration of the kernel + * + * @param[in] lws_hint Local-Workgroup-Size to use + */ + void set_lws_hint(const cl::NDRange &lws_hint) + { + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // lws_hint will be overwritten by configure() + _lws_hint = lws_hint; + } + + /** Return the Local-Workgroup-Size hint + * + * @return Current lws hint + */ + cl::NDRange lws_hint() const + { + return _lws_hint; + } + + /** Get the configuration ID + * + * @note The configuration ID can be used by the caller to distinguish different calls of the same OpenCL kernel + * In particular, this method can be used by CLScheduler to keep track of the best LWS for each configuration of the same kernel. + * The configuration ID should be provided only for the kernels potentially affected by the LWS geometry + * + * @note This method should be called after the configuration of the kernel + * + * @return configuration id string + */ + const std::string &config_id() const + { + return _config_id; + } + + /** Set the targeted GPU architecture + * + * @param[in] target The targeted GPU architecture + */ + void set_target(GPUTarget target) + { + _target = target; + } + + /** Set the targeted GPU architecture according to the CL device + * + * @param[in] device A CL device + */ + void set_target(cl::Device &device); + + /** Get the targeted GPU architecture + * + * @return The targeted GPU architecture. + */ + GPUTarget get_target() const + { + return _target; + } + + /** Get the maximum workgroup size for the device the CLKernelLibrary uses. + * + * @return The maximum workgroup size value. + */ + size_t get_max_workgroup_size(); + /** Get the global work size given an execution window + * + * @param[in] window Execution window + * + * @return Global work size of the given execution window + */ + static cl::NDRange gws_from_window(const Window &window); + +private: + /** Add the passed array's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] array Array to set as an argument of the object's kernel. + * @param[in] strides @ref Strides object containing stride of each dimension in bytes. + * @param[in] num_dimensions Number of dimensions of the @p array. + * @param[in] window Window the kernel will be executed on. + */ + template + void add_array_argument(unsigned int &idx, const ICLArray *array, const Strides &strides, unsigned int num_dimensions, const Window &window); + /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + template + void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + +protected: + cl::Kernel _kernel; /**< OpenCL kernel to run */ + GPUTarget _target; /**< The targeted GPU */ + std::string _config_id; /**< Configuration ID */ + size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */ +private: + cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */ +}; + +/** Add the kernel to the command queue with the given window. + * + * @note Depending on the size of the window, this might translate into several jobs being enqueued. + * + * @note If kernel->kernel() is empty then the function will return without adding anything to the queue. + * + * @param[in,out] queue OpenCL command queue. + * @param[in] kernel Kernel to enqueue + * @param[in] window Window the kernel has to process. + * @param[in] lws_hint (Optional) Local workgroup size requested. Default is based on the device target. + * @param[in] use_dummy_work_items (Optional) Use dummy work items in order to have two dimensional power of two NDRange. Default is false + * Note: it is kernel responsibility to check if the work-item is out-of-range + * + * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed. + */ +void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items = false); + +/** Add the passed array's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] array Array to set as an argument of the object's kernel. + * @param[in] strides @ref Strides object containing stride of each dimension in bytes. + * @param[in] num_dimensions Number of dimensions of the @p array. + * @param[in] window Window the kernel will be executed on. + */ +template +void ICLKernel::add_array_argument(unsigned &idx, const ICLArray *array, const Strides &strides, unsigned int num_dimensions, const Window &window) +{ + ARM_COMPUTE_ERROR_ON(array == nullptr); + + // Calculate offset to the start of the window + unsigned int offset_first_element = 0; + + for(unsigned int n = 0; n < num_dimensions; ++n) + { + offset_first_element += window[n].start() * strides[n]; + } + + unsigned int idx_start = idx; + _kernel.setArg(idx++, array->cl_buffer()); + + for(unsigned int dimension = 0; dimension < dimension_size; dimension++) + { + _kernel.setArg(idx++, strides[dimension]); + _kernel.setArg(idx++, strides[dimension] * window[dimension].step()); + } + + _kernel.setArg(idx++, offset_first_element); + + ARM_COMPUTE_ERROR_ON_MSG_VAR(idx_start + num_arguments_per_array() != idx, + "add_%dD_array_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_array()); + ARM_COMPUTE_UNUSED(idx_start); +} +} +#endif /*ARM_COMPUTE_ICLKERNEL_H */ diff --git a/src/core/CL/ICLSimple2DKernel.cpp b/src/core/CL/ICLSimple2DKernel.cpp index dfef5822b2..5d8295bdfe 100644 --- a/src/core/CL/ICLSimple2DKernel.cpp +++ b/src/core/CL/ICLSimple2DKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "src/core/CL/ICLSimple2DKernel.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/CL/ICLSimple2DKernel.h b/src/core/CL/ICLSimple2DKernel.h new file mode 100644 index 0000000000..5246492401 --- /dev/null +++ b/src/core/CL/ICLSimple2DKernel.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ICLSIMPLE2DKERNEL_H +#define ARM_COMPUTE_ICLSIMPLE2DKERNEL_H + +#include "src/core/CL/ICLSimpleKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output. This interface can be used when the work-item processes a 2D tile */ +class ICLSimple2DKernel : public ICLSimpleKernel +{ +public: + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; +} +#endif /*ARM_COMPUTE_ICLSIMPLE2DKERNEL_H */ diff --git a/src/core/CL/ICLSimple3DKernel.cpp b/src/core/CL/ICLSimple3DKernel.cpp index 3d08262b5f..fef1a86125 100644 --- a/src/core/CL/ICLSimple3DKernel.cpp +++ b/src/core/CL/ICLSimple3DKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/ICLSimple3DKernel.h" +#include "src/core/CL/ICLSimple3DKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/CL/ICLSimple3DKernel.h b/src/core/CL/ICLSimple3DKernel.h new file mode 100644 index 0000000000..ff0b274663 --- /dev/null +++ b/src/core/CL/ICLSimple3DKernel.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ICLSIMPLE3DKERNEL_H +#define ARM_COMPUTE_ICLSIMPLE3DKERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output. + * Both input tensor and output tensor must have at least 3 dimensions. + */ +class ICLSimple3DKernel : public ICLSimple2DKernel +{ +public: + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; +} +#endif /*ARM_COMPUTE_ICLSIMPLE3DKERNEL_H */ diff --git a/src/core/CL/ICLSimpleKernel.cpp b/src/core/CL/ICLSimpleKernel.cpp index 90b5be8069..d67fefdf71 100644 --- a/src/core/CL/ICLSimpleKernel.cpp +++ b/src/core/CL/ICLSimpleKernel.cpp @@ -21,8 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/ICLSimpleKernel.h" - +#include "src/core/CL/ICLSimpleKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/Validate.h" diff --git a/src/core/CL/ICLSimpleKernel.h b/src/core/CL/ICLSimpleKernel.h new file mode 100644 index 0000000000..b35547a217 --- /dev/null +++ b/src/core/CL/ICLSimpleKernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ICLSIMPLEKERNEL_H +#define ARM_COMPUTE_ICLSIMPLEKERNEL_H + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Helpers.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output */ +class ICLSimpleKernel : public ICLKernel +{ +public: + /** Constructor. */ + ICLSimpleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + ICLSimpleKernel(const ICLSimpleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + ICLSimpleKernel &operator=(const ICLSimpleKernel &) = delete; + /** Allow instances of this class to be moved */ + ICLSimpleKernel(ICLSimpleKernel &&) = default; + /** Allow instances of this class to be moved */ + ICLSimpleKernel &operator=(ICLSimpleKernel &&) = default; + /** Default destructor */ + ~ICLSimpleKernel() = default; + + /** Configure the kernel + * + * @param[in] input Source tensor. + * @param[out] output Destination tensor. + * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. + * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. + * @param[in] border_size (Optional) Size of the border. + */ + void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize()); + +protected: + const ICLTensor *_input; + ICLTensor *_output; +}; +} + +#endif /*ARM_COMPUTE_ICLSIMPLEKERNEL_H */ diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp index 29745beee7..76b60cb9f8 100644 --- a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp +++ b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp @@ -21,14 +21,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h" - #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" + +#include "src/core/CL/kernels/CLAbsoluteDifferenceKernel.h" #include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.h b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.h new file mode 100644 index 0000000000..28f28fe44f --- /dev/null +++ b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H +#define ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the absolute difference kernel. + * + * Absolute difference is computed by: + * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] + */ +class CLAbsoluteDifferenceKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLAbsoluteDifferenceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLAbsoluteDifferenceKernel(const CLAbsoluteDifferenceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLAbsoluteDifferenceKernel &operator=(const CLAbsoluteDifferenceKernel &) = delete; + /** Allow instances of this class to be moved */ + CLAbsoluteDifferenceKernel(CLAbsoluteDifferenceKernel &&) = default; + /** Allow instances of this class to be moved */ + CLAbsoluteDifferenceKernel &operator=(CLAbsoluteDifferenceKernel &&) = default; + /** Default destructor */ + ~CLAbsoluteDifferenceKernel() = default; + + /** Set the inputs and output images. + * + * @param[in] input1 Source tensor. Data types supported: U8/S16. + * @param[in] input2 Source tensor. Data types supported: U8/S16. + * @param[out] output Destination tensor. Data types supported: U8/S16. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Set the inputs and output images. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source tensor. Data types supported: U8/S16. + * @param[in] input2 Source tensor. Data types supported: U8/S16. + * @param[out] output Destination tensor. Data types supported: U8/S16. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1. */ + const ICLTensor *_input2; /**< Source tensor 2. */ + ICLTensor *_output; /**< Destination tensor. */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H */ diff --git a/src/core/CL/kernels/CLAccumulateKernel.cpp b/src/core/CL/kernels/CLAccumulateKernel.cpp index f161906646..b0a8eba644 100644 --- a/src/core/CL/kernels/CLAccumulateKernel.cpp +++ b/src/core/CL/kernels/CLAccumulateKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h" +#include "src/core/CL/kernels/CLAccumulateKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLAccumulateKernel.h b/src/core/CL/kernels/CLAccumulateKernel.h new file mode 100644 index 0000000000..16a715319d --- /dev/null +++ b/src/core/CL/kernels/CLAccumulateKernel.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLACCUMULATEKERNEL_H +#define ARM_COMPUTE_CLACCUMULATEKERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the accumulate kernel. + * + * Accumulation is computed by: + * @f[ accum(x,y) = accum(x,y) + input(x,y) @f] + */ +class CLAccumulateKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and accumulation tensors. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] accum Destination tensor. Data types supported: S16. + */ + void configure(const ICLTensor *input, ICLTensor *accum); + /** Set the input and accumulation tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] accum Destination tensor. Data types supported: S16. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum); +}; + +/** Interface for the accumulate weighted kernel. + * + * Weighted accumulation is computed: + * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f] + * + * Where @f$ 0 \le \alpha \le 1 @f$ + * Conceptually, the rounding for this is defined as: + * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f] +*/ +class CLAccumulateWeightedKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and accumulation images, and the scale value. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32. + * @param[in,out] accum Accumulated tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input, float alpha, ICLTensor *accum); + /** Set the input and accumulation images, and the scale value. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32. + * @param[in,out] accum Accumulated tensor. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum); +}; + +/** Interface for the accumulate squared kernel. + * + * The accumulation of squares is computed: + * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f] + * + * Where @f$ 0 \le shift \le 15 @f$ +*/ +class CLAccumulateSquaredKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32. + * @param[in,out] accum Accumulated tensor. Data types supported: S16. + */ + void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum); + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32. + * @param[in,out] accum Accumulated tensor. Data types supported: S16. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum); +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLACCUMULATEKERNEL_H */ diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index f0e3047796..8ddf8d8f9e 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" +#include "src/core/CL/kernels/CLActivationLayerKernel.h" #include "arm_compute/core/CL/CLCoreRuntimeContext.h" #include "arm_compute/core/CL/CLHelpers.h" diff --git a/src/core/CL/kernels/CLActivationLayerKernel.h b/src/core/CL/kernels/CLActivationLayerKernel.h new file mode 100644 index 0000000000..821418f835 --- /dev/null +++ b/src/core/CL/kernels/CLActivationLayerKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H +#define ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; +/** Interface for the activation layer kernel. */ +class CLActivationLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLActivationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLActivationLayerKernel(const CLActivationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLActivationLayerKernel &operator=(const CLActivationLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLActivationLayerKernel(CLActivationLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLActivationLayerKernel &operator=(CLActivationLayerKernel &&) = default; + /** Default destructor */ + ~CLActivationLayerKernel() = default; + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo act_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayerKernel + * + * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result + * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; + +private: + bool _run_in_place; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp index b5a801a97f..0e6fc6599c 100644 --- a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h" +#include "src/core/CL/kernels/CLArgMinMaxLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.h b/src/core/CL/kernels/CLArgMinMaxLayerKernel.h new file mode 100644 index 0000000000..929677f905 --- /dev/null +++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H +#define ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the reduction operation kernel + * + * @note The default data type for an uninitialized output tensor is + * signed 32-bit integer (S32). It is the user's responsibility to check + * that the results do not overflow because the indices are computed + * in unsigned 32-bit (U32). + */ +class CLArgMinMaxLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLArgMinMaxLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArgMinMaxLayerKernel(const CLArgMinMaxLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArgMinMaxLayerKernel &operator=(const CLArgMinMaxLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLArgMinMaxLayerKernel(CLArgMinMaxLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLArgMinMaxLayerKernel &operator=(CLArgMinMaxLayerKernel &&) = default; + /** Default destructor */ + ~CLArgMinMaxLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32 + * Has to be nullptr for the first iteration + * @param[out] output Destination tensor. Data types supported: U32/S32 + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 + * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported. + */ + void configure(const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32 + * Has to be nullptr for the first iteration + * @param[out] output Destination tensor. Data types supported: U32/S32 + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 + * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op); + + /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayerKernel. + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[in] prev_output Destination tensor info of the previous iterations. Data types supported: U32/S32 + * Has to be nullptr for the first iteration + * @param[in] output Destination tensor info. Data types supported: U32/S32 + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 + * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *prev_output, const ITensorInfo *output, unsigned int axis, ReductionOperation op); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_prev_output; + ICLTensor *_output; + unsigned int _reduction_axis; + ReductionOperation _op; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp index 7a8c9ad0fb..7e9424f58b 100644 --- a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h" +#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h new file mode 100644 index 0000000000..54a89eb243 --- /dev/null +++ b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H +#define ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the batch concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class CLBatchConcatenateLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLBatchConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchConcatenateLayerKernel(const CLBatchConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchConcatenateLayerKernel &operator=(const CLBatchConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBatchConcatenateLayerKernel(CLBatchConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBatchConcatenateLayerKernel &operator=(CLBatchConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~CLBatchConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[in] batch_offset The offset on axis # 3. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: All. + * @param[in] batch_offset The offset on axis # 3. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; + +private: + unsigned int _batch_offset; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H */ diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp index 09b668d6cd..9aeca3bcfe 100644 --- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h" +#include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.h new file mode 100644 index 0000000000..743f4a9594 --- /dev/null +++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the BatchNormalization layer kernel. + */ +class CLBatchNormalizationLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLBatchNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchNormalizationLayerKernel(const CLBatchNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchNormalizationLayerKernel &operator=(const CLBatchNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + CLBatchNormalizationLayerKernel(CLBatchNormalizationLayerKernel &&) = default; + /** Default move assignment operator */ + CLBatchNormalizationLayerKernel &operator=(CLBatchNormalizationLayerKernel &&) = default; + /** Default destructor */ + ~CLBatchNormalizationLayerKernel() = default; + + /** Set the input and output tensors. + * + * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place + * + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input + * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input + * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + */ + void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr, float epsilon = 0.001f, + ActivationLayerInfo act_info = ActivationLayerInfo()); + /** Set the input and output tensors. + * + * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input + * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input + * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, + const ICLTensor *gamma = nullptr, float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayerKernel + * + * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input + * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input + * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const ITensorInfo *mean, const ITensorInfo *var, + const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr, + float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_mean; + const ICLTensor *_var; + const ICLTensor *_beta; + const ICLTensor *_gamma; + float _epsilon; + bool _run_in_place; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp index e5997fb4d2..da41feb7b8 100644 --- a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h" +#include "src/core/CL/kernels/CLBatchToSpaceLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.h b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.h new file mode 100644 index 0000000000..131a43e59c --- /dev/null +++ b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H +#define ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the batch to space kernel */ +class CLBatchToSpaceLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLBatchToSpaceLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchToSpaceLayerKernel(const CLBatchToSpaceLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchToSpaceLayerKernel &operator=(const CLBatchToSpaceLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBatchToSpaceLayerKernel(CLBatchToSpaceLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBatchToSpaceLayerKernel &operator=(CLBatchToSpaceLayerKernel &&) = default; + /** Default destructor */ + ~CLBatchToSpaceLayerKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output); + /** Initialise the kernel's inputs and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output); + /** Initialise the kernel's inputs and output (Static block shape). + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output); + /** Initialise the kernel's inputs and output (Static block shape). + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[in] output Tensor output. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel (Static block shape). + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[in] output Tensor output. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + const ICLTensor *_block_shape; /**< Block shape tensor */ + ICLTensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLBitwiseAndKernel.cpp b/src/core/CL/kernels/CLBitwiseAndKernel.cpp index 53a438dcf6..91a659284a 100644 --- a/src/core/CL/kernels/CLBitwiseAndKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseAndKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h" +#include "src/core/CL/kernels/CLBitwiseAndKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLBitwiseAndKernel.h b/src/core/CL/kernels/CLBitwiseAndKernel.h new file mode 100644 index 0000000000..01018ee09d --- /dev/null +++ b/src/core/CL/kernels/CLBitwiseAndKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLBITWISEANDKERNEL_H +#define ARM_COMPUTE_CLBITWISEANDKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise AND operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f] + */ +class CLBitwiseAndKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLBitwiseAndKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBitwiseAndKernel(const CLBitwiseAndKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBitwiseAndKernel &operator=(const CLBitwiseAndKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBitwiseAndKernel(CLBitwiseAndKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBitwiseAndKernel &operator=(CLBitwiseAndKernel &&) = default; + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Set the inputs and output images + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLBITWISEANDKERNEL_H */ diff --git a/src/core/CL/kernels/CLBitwiseNotKernel.cpp b/src/core/CL/kernels/CLBitwiseNotKernel.cpp index 08e4c54957..118bfe8139 100644 --- a/src/core/CL/kernels/CLBitwiseNotKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseNotKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h" +#include "src/core/CL/kernels/CLBitwiseNotKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLBitwiseNotKernel.h b/src/core/CL/kernels/CLBitwiseNotKernel.h new file mode 100644 index 0000000000..bf68bc7ae5 --- /dev/null +++ b/src/core/CL/kernels/CLBitwiseNotKernel.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLBITWISENOTKERNEL_H +#define ARM_COMPUTE_CLBITWISENOTKERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise NOT operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = \lnot input(x,y) @f] + */ +class CLBitwiseNotKernel : public ICLSimple2DKernel +{ +public: + /** Set the inputs and output images. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Set the inputs and output images. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLBITWISENOTKERNEL_H */ diff --git a/src/core/CL/kernels/CLBitwiseOrKernel.cpp b/src/core/CL/kernels/CLBitwiseOrKernel.cpp index 0e2e5d4f3c..8954d9aa6d 100644 --- a/src/core/CL/kernels/CLBitwiseOrKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseOrKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h" +#include "src/core/CL/kernels/CLBitwiseOrKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLBitwiseOrKernel.h b/src/core/CL/kernels/CLBitwiseOrKernel.h new file mode 100644 index 0000000000..c27d0c27e2 --- /dev/null +++ b/src/core/CL/kernels/CLBitwiseOrKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLBITWISEORKERNEL_H +#define ARM_COMPUTE_CLBITWISEORKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise OR operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f] + */ +class CLBitwiseOrKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLBitwiseOrKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBitwiseOrKernel(const CLBitwiseOrKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBitwiseOrKernel &operator=(const CLBitwiseOrKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBitwiseOrKernel(CLBitwiseOrKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBitwiseOrKernel &operator=(CLBitwiseOrKernel &&) = default; + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Set the inputs and output images + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLBITWISEORKERNEL_H */ diff --git a/src/core/CL/kernels/CLBitwiseXorKernel.cpp b/src/core/CL/kernels/CLBitwiseXorKernel.cpp index 65b17c02bd..69eb38e2e6 100644 --- a/src/core/CL/kernels/CLBitwiseXorKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseXorKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBitwiseXorKernel.h" +#include "src/core/CL/kernels/CLBitwiseXorKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLBitwiseXorKernel.h b/src/core/CL/kernels/CLBitwiseXorKernel.h new file mode 100644 index 0000000000..b4861ea757 --- /dev/null +++ b/src/core/CL/kernels/CLBitwiseXorKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLBITWISEXORKERNEL_H +#define ARM_COMPUTE_CLBITWISEXORKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise XOR operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f] + */ +class CLBitwiseXorKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLBitwiseXorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBitwiseXorKernel(const CLBitwiseXorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBitwiseXorKernel &operator=(const CLBitwiseXorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBitwiseXorKernel(CLBitwiseXorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBitwiseXorKernel &operator=(CLBitwiseXorKernel &&) = default; + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Set the inputs and output images + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLBITWISEXORKERNEL_H */ diff --git a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp index b8c0d2f2b8..bcfd9b8e5a 100644 --- a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp +++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h" +#include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLBoundingBoxTransformKernel.h b/src/core/CL/kernels/CLBoundingBoxTransformKernel.h new file mode 100644 index 0000000000..08f350e86a --- /dev/null +++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H +#define ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bounding box kernel */ +class CLBoundingBoxTransformKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLBoundingBoxTransformKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBoundingBoxTransformKernel(const CLBoundingBoxTransformKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBoundingBoxTransformKernel &operator=(const CLBoundingBoxTransformKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBoundingBoxTransformKernel(CLBoundingBoxTransformKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBoundingBoxTransformKernel &operator=(CLBoundingBoxTransformKernel &&) = default; + /** Default destructor */ + ~CLBoundingBoxTransformKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. + * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input + * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. + * + * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. + * + */ + void configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. + * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input + * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. + * + * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. + * + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info); + + /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform + * + * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. + * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input + * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. + * + * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. + * + * @return a Status + */ + static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_boxes; + ICLTensor *_pred_boxes; + const ICLTensor *_deltas; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H */ diff --git a/src/core/CL/kernels/CLBox3x3Kernel.cpp b/src/core/CL/kernels/CLBox3x3Kernel.cpp index 2f6c09df0b..9f493b4fb8 100644 --- a/src/core/CL/kernels/CLBox3x3Kernel.cpp +++ b/src/core/CL/kernels/CLBox3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLBox3x3Kernel.h" +#include "src/core/CL/kernels/CLBox3x3Kernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLBox3x3Kernel.h b/src/core/CL/kernels/CLBox3x3Kernel.h new file mode 100644 index 0000000000..2373c4a928 --- /dev/null +++ b/src/core/CL/kernels/CLBox3x3Kernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLBOX3X3KERNEL_H +#define ARM_COMPUTE_CLBOX3X3KERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the box 3x3 filter kernel. + * + */ +class CLBox3x3Kernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /**Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); + + //Inherited methods overriden: + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLBOX3X3KERNEL_H */ diff --git a/src/core/CL/kernels/CLCannyEdgeKernel.cpp b/src/core/CL/kernels/CLCannyEdgeKernel.cpp index c76ec6769e..1fe944c8a2 100644 --- a/src/core/CL/kernels/CLCannyEdgeKernel.cpp +++ b/src/core/CL/kernels/CLCannyEdgeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h" +#include "src/core/CL/kernels/CLCannyEdgeKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLCannyEdgeKernel.h b/src/core/CL/kernels/CLCannyEdgeKernel.h new file mode 100644 index 0000000000..7543822d8d --- /dev/null +++ b/src/core/CL/kernels/CLCannyEdgeKernel.h @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCANNYEDGEKERNEL_H +#define ARM_COMPUTE_CLCANNYEDGEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform Gradient computation. + */ +class CLGradientKernel : public ICLKernel +{ +public: + /** Constructor */ + CLGradientKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGradientKernel(const CLGradientKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGradientKernel &operator=(const CLGradientKernel &) = delete; + /** Initialise the kernel's sources, destinations and border mode. + * + * @note gx, gy and mag must all be the same size (either 16 or 32). + * + * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32. + * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx. + * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy. + * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8. + * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. + */ + void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type); + /** Initialise the kernel's sources, destinations and border mode. + * + * @note gx, gy and mag must all be the same size (either 16 or 32). + * + * @param[in] compile_context The compile context to be used. + * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32. + * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx. + * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy. + * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8. + * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_gx; /**< Source tensor - Gx component */ + const ICLTensor *_gy; /**< Source tensor - Gy component */ + ICLTensor *_magnitude; /**< Destination tensor - Magnitude */ + ICLTensor *_phase; /**< Destination tensor - Quantized phase */ +}; + +/** OpenCL kernel to perform Non-Maxima suppression for Canny Edge. + * + * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input + * to characterize points as possible edges. The output buffer needs to be cleared before this kernel is executed. + * + * @note Hysteresis is computed in @ref CLEdgeTraceKernel + */ +class CLEdgeNonMaxSuppressionKernel : public ICLKernel +{ +public: + /** Constructor */ + CLEdgeNonMaxSuppressionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeNonMaxSuppressionKernel(const CLEdgeNonMaxSuppressionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeNonMaxSuppressionKernel &operator=(const CLEdgeNonMaxSuppressionKernel &) = delete; + /** Initialise the kernel's sources, destination and border mode. + * + * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32. + * @param[in] phase Source tensor - Quantized phase. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U16/U32. + * @param[in] lower_thr Lower threshold. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined); + /** Initialise the kernel's sources, destination and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32. + * @param[in] phase Source tensor - Quantized phase. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U16/U32. + * @param[in] lower_thr Lower threshold. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_magnitude; /**< Source tensor - Magnitude. */ + const ICLTensor *_phase; /**< Source tensor - Quantized phase. */ + ICLTensor *_output; /**< Destination tensor. */ +}; + +/** OpenCL kernel to perform Edge tracing. + */ +class CLEdgeTraceKernel : public ICLKernel +{ +public: + /** Constructor */ + CLEdgeTraceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeTraceKernel(const CLEdgeTraceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeTraceKernel &operator=(const CLEdgeTraceKernel &) = delete; + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data types supported: U16/U32. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32. + * Expected to be initialized to 0 before each run. + * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32 + * Expected to be initialized to 0 before each run. + * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32. + * Expected to be initialized to 0 before each run. + * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8. + * Expected to be initialized to 0 before each run. + */ + void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, + ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter); + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U16/U32. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32. + * Expected to be initialized to 0 before each run. + * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32 + * Expected to be initialized to 0 before each run. + * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32. + * Expected to be initialized to 0 before each run. + * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8. + * Expected to be initialized to 0 before each run. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, + ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor. */ + ICLTensor *_output; /**< Destination tensor. */ + int32_t _lower_thr; /**< Lower threshold used for the hysteresis. */ + int32_t _upper_thr; /**< Upper threshold used for the hysteresis. */ + ICLTensor *_visited; /**< Marks visited elements */ + ICLTensor *_recorded; /**< Marks recorded elements */ + ICLTensor *_l1_stack; /**< L1 hysteris stack */ + ICLTensor *_l1_stack_counter; /**< L1 hysteris stack counter */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLCANNYEDGEKERNEL_H */ diff --git a/src/core/CL/kernels/CLChannelCombineKernel.cpp b/src/core/CL/kernels/CLChannelCombineKernel.cpp index d574f352ae..52ba9dd065 100644 --- a/src/core/CL/kernels/CLChannelCombineKernel.cpp +++ b/src/core/CL/kernels/CLChannelCombineKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLChannelCombineKernel.h" +#include "src/core/CL/kernels/CLChannelCombineKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLMultiImage.h" diff --git a/src/core/CL/kernels/CLChannelCombineKernel.h b/src/core/CL/kernels/CLChannelCombineKernel.h new file mode 100644 index 0000000000..f19995aa8e --- /dev/null +++ b/src/core/CL/kernels/CLChannelCombineKernel.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H +#define ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include +#include + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the channel combine kernel */ +class CLChannelCombineKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLChannelCombineKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelCombineKernel(const CLChannelCombineKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelCombineKernel &operator=(const CLChannelCombineKernel &) = delete; + /** Allow instances of this class to be moved */ + CLChannelCombineKernel(CLChannelCombineKernel &&) = default; + /** Allow instances of this class to be moved */ + CLChannelCombineKernel &operator=(CLChannelCombineKernel &&) = default; + /** Default destructor */ + ~CLChannelCombineKernel() = default; + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. + * @param[out] output The single planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422. + */ + void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); + /** Configure function's inputs and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. + * @param[out] output The single planar output tensor. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422. + */ + void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); + /** Configure function's inputs and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + std::array _planes; + ICLTensor *_output; + ICLMultiImage *_output_multi; + std::array _x_subsampling; + std::array _y_subsampling; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H */ diff --git a/src/core/CL/kernels/CLChannelExtractKernel.cpp b/src/core/CL/kernels/CLChannelExtractKernel.cpp index 7911b948ae..cbf504b98b 100644 --- a/src/core/CL/kernels/CLChannelExtractKernel.cpp +++ b/src/core/CL/kernels/CLChannelExtractKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLChannelExtractKernel.h" +#include "src/core/CL/kernels/CLChannelExtractKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLMultiImage.h" diff --git a/src/core/CL/kernels/CLChannelExtractKernel.h b/src/core/CL/kernels/CLChannelExtractKernel.h new file mode 100644 index 0000000000..37abde548c --- /dev/null +++ b/src/core/CL/kernels/CLChannelExtractKernel.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H +#define ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +#include + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the channel extract kernel */ +class CLChannelExtractKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLChannelExtractKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelExtractKernel(const CLChannelExtractKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelExtractKernel &operator=(const CLChannelExtractKernel &) = delete; + /** Allow instances of this class to be moved */ + CLChannelExtractKernel(CLChannelExtractKernel &&) = default; + /** Allow instances of this class to be moved */ + CLChannelExtractKernel &operator=(CLChannelExtractKernel &&) = default; + /** Default destructor */ + ~CLChannelExtractKernel() = default; + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 + * @param[in] channel Channel to extract. + * @param[out] output Destination tensor. Must be of U8 format. + */ + void configure(const ICLTensor *input, Channel channel, ICLTensor *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 + * @param[in] channel Channel to extract. + * @param[out] output Destination tensor. Must be of U8 format. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 + * @param[in] channel Channel to extract. + * @param[out] output Single-planar 2D destination image. Must be of U8 format. + */ + void configure(const ICLMultiImage *input, Channel channel, ICLImage *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 + * @param[in] channel Channel to extract. + * @param[out] output Single-planar 2D destination image. Must be of U8 format. + */ + void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + uint32_t _num_elems_processed_per_iteration; + uint32_t _subsampling; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H */ diff --git a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp index 301a762850..c969792c3e 100644 --- a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp +++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h" +#include "src/core/CL/kernels/CLChannelShuffleLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLChannelShuffleLayerKernel.h b/src/core/CL/kernels/CLChannelShuffleLayerKernel.h new file mode 100644 index 0000000000..31c007f17e --- /dev/null +++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H +#define ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the channel shuffle kernel */ +class CLChannelShuffleLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLChannelShuffleLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelShuffleLayerKernel(const CLChannelShuffleLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelShuffleLayerKernel &operator=(const CLChannelShuffleLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLChannelShuffleLayerKernel(CLChannelShuffleLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLChannelShuffleLayerKernel &operator=(CLChannelShuffleLayerKernel &&) = default; + /** Default destructor */ + ~CLChannelShuffleLayerKernel() = default; + /** Configure function's inputs and outputs. + * + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. + */ + void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups); + /** Configure function's inputs and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups); + /** Static function to check if given info will lead to a valid configuration of @ref CLChannelShuffleLayerKernel + * + * @param[in] input Input tensor info. Data types supported: All. + * @param[in] output Output tensor info. Data type supported: Same as @p input + * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp index 3dc007d9e0..44b8471725 100644 --- a/src/core/CL/kernels/CLCol2ImKernel.cpp +++ b/src/core/CL/kernels/CLCol2ImKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" +#include "src/core/CL/kernels/CLCol2ImKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLCol2ImKernel.h b/src/core/CL/kernels/CLCol2ImKernel.h new file mode 100644 index 0000000000..710e048bca --- /dev/null +++ b/src/core/CL/kernels/CLCol2ImKernel.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCOL2IMKERNEL_H +#define ARM_COMPUTE_CLCOL2IMKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the col2im reshaping kernel. + * + * Rearranges each matrix column into image blocks. It's the inverse operation of @ref CLIm2ColKernel. + * + * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: + * + * @f[ + * \left( \begin{array}{ccccccccc} + * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccc} + * a0 & a1 & a2 \\ + * a3 & a4 & a5 \\ + * a6 & a7 & a8 \\ + * \end{array} \right) + * @f] + */ +class CLCol2ImKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLCol2ImKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCol2ImKernel(const CLCol2ImKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCol2ImKernel &operator=(const CLCol2ImKernel &) = delete; + /** Allow instances of this class to be moved */ + CLCol2ImKernel(CLCol2ImKernel &&) = default; + /** Allow instances of this class to be moved */ + CLCol2ImKernel &operator=(CLCol2ImKernel &&) = default; + /** Default destructor */ + ~CLCol2ImKernel() = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW + * @param[in] convolved_dims Output convolved dimensions. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution + */ + void configure(const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1); + /** Set the input and output of the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW + * @param[in] convolved_dims Output convolved dimensions. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1); + /** Static function to check if given info will lead to a valid configuration of @ref CLCol2ImKernel + * + * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW + * @param[in] convolved_dims Output convolved dimensions. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims, unsigned int num_groups = 1); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +public: + const ICLTensor *_input; + ICLTensor *_output; + Size2D _convolved_dims; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLCOL2IMKERNEL_H */ diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp index 0f82d87348..6c61fec997 100644 --- a/src/core/CL/kernels/CLColorConvertKernel.cpp +++ b/src/core/CL/kernels/CLColorConvertKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h" +#include "src/core/CL/kernels/CLColorConvertKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLMultiImage.h" diff --git a/src/core/CL/kernels/CLColorConvertKernel.h b/src/core/CL/kernels/CLColorConvertKernel.h new file mode 100644 index 0000000000..0f082914cd --- /dev/null +++ b/src/core/CL/kernels/CLColorConvertKernel.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCOLORCONVERTKERNEL_H +#define ARM_COMPUTE_CLCOLORCONVERTKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the color convert kernel. + * + */ +class CLColorConvertKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLColorConvertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLColorConvertKernel(const CLColorConvertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLColorConvertKernel &operator=(const CLColorConvertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLColorConvertKernel(CLColorConvertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLColorConvertKernel &operator=(CLColorConvertKernel &&) = default; + /** Default destructor. */ + ~CLColorConvertKernel() = default; + + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 + * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), + * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), + * U8 (if the formats of @p input is RGB888) + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 + * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), + * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), + * U8 (if the formats of @p input is RGB888) + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 + */ + void configure(const ICLMultiImage *input, ICLImage *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 + */ + void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output); + /** Set the input and output of the kernel + * + * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) + */ + void configure(const ICLImage *input, ICLMultiImage *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) + */ + void configure(const ICLMultiImage *input, ICLMultiImage *output); + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) + */ + void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /*pointer to single planar tensor input */ + ICLTensor *_output; /*pointer to single planar tensor output */ + const ICLMultiImage *_multi_input; /*pointer to multi-planar input */ + ICLMultiImage *_multi_output; /*pointer to multi-planar output */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLCOLORCONVERTKERNEL_H */ diff --git a/src/core/CL/kernels/CLComparisonKernel.cpp b/src/core/CL/kernels/CLComparisonKernel.cpp index 2b72946f49..e2aee36bd8 100644 --- a/src/core/CL/kernels/CLComparisonKernel.cpp +++ b/src/core/CL/kernels/CLComparisonKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLComparisonKernel.h" +#include "src/core/CL/kernels/CLComparisonKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLComparisonKernel.h b/src/core/CL/kernels/CLComparisonKernel.h new file mode 100644 index 0000000000..0b94190183 --- /dev/null +++ b/src/core/CL/kernels/CLComparisonKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCOMPARISONKERNEL_H +#define ARM_COMPUTE_CLCOMPARISONKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** Interface for the comparison kernel. */ +class CLComparisonKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLComparisonKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComparisonKernel(const CLComparisonKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComparisonKernel &operator=(const CLComparisonKernel &) = delete; + /** Allow instances of this class to be moved */ + CLComparisonKernel(CLComparisonKernel &&) = default; + /** Allow instances of this class to be moved */ + CLComparisonKernel &operator=(CLComparisonKernel &&) = default; + /** Default destructor */ + ~CLComparisonKernel() = default; + /** Set the inputs and output tensors + * + * @param[in] input1 Source tensor. Data types supported: All. + * @param[in] input2 Source tensor. Data types supported: Same as @p input1. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] operation Comparison operation to use. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation); + /** Set the inputs and output tensors + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source tensor. Data types supported: All. + * @param[in] input2 Source tensor. Data types supported: Same as @p input1. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] operation Comparison operation to use. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation); + /** Static function to check if given info will lead to a valid configuration of @ref CLComparisonKernel + * + * @param[in] input1 Source tensor. Data types supported: All. + * @param[in] input2 Source tensor. Data types supported: Same as @p input1. + * @param[in] output Destination tensor. Data types supported: U8. + * @param[in] operation Comparison operation to use. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation operation); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLCOMPARISONKERNEL_H */ diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp index c7888c9c76..dcf4e6662e 100644 --- a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp +++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h" +#include "src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h new file mode 100644 index 0000000000..d1da793df2 --- /dev/null +++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H +#define ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa. + * + * @note This function can be applied to the 2D weights used by a Fully Connected layer if: + * - It follows a Convolution layer + * - The data layout used by the network does not match the one the model has been trained in. + * + * @note This function assumes the weights are already reshaped (transposed) + */ +class CLConvertFullyConnectedWeightsKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLConvertFullyConnectedWeightsKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvertFullyConnectedWeightsKernel(const CLConvertFullyConnectedWeightsKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvertFullyConnectedWeightsKernel &operator=(const CLConvertFullyConnectedWeightsKernel &) = delete; + /** Allow instances of this class to be moved */ + CLConvertFullyConnectedWeightsKernel(CLConvertFullyConnectedWeightsKernel &&) = default; + /** Allow instances of this class to be moved */ + CLConvertFullyConnectedWeightsKernel &operator=(CLConvertFullyConnectedWeightsKernel &&) = default; + /** Default destructor */ + ~CLConvertFullyConnectedWeightsKernel() = default; + /** Set the input and output tensor. + * + * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. + * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. + * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). + * @param[in] data_layout The data layout the weights have been trained in. + */ + void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout); + /** Set the input and output tensor. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. + * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. + * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). + * @param[in] data_layout The data layout the weights have been trained in. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout); + /** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeightsKernel + * + * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All. + * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input. + * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). + * @param[in] data_layout The data layout the weights have been trained in. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H */ diff --git a/src/core/CL/kernels/CLConvolutionKernel.cpp b/src/core/CL/kernels/CLConvolutionKernel.cpp index 48b185f78d..21f1047cc6 100644 --- a/src/core/CL/kernels/CLConvolutionKernel.cpp +++ b/src/core/CL/kernels/CLConvolutionKernel.cpp @@ -21,11 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" +#include "src/core/CL/kernels/CLConvolutionKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Error.h" @@ -33,6 +32,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" +#include "src/core/CL/ICLKernel.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLConvolutionKernel.h b/src/core/CL/kernels/CLConvolutionKernel.h new file mode 100644 index 0000000000..33e73caf11 --- /dev/null +++ b/src/core/CL/kernels/CLConvolutionKernel.h @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCONVOLUTIONKERNEL_H +#define ARM_COMPUTE_CLCONVOLUTIONKERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/****************************************************************************************\ + * Square Convolution * +\****************************************************************************************/ + +/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9). + * The client can supply a convolution matrix \f$ C_{m,n} \f$. + * @f{eqnarray}{ + * k_0 &=& \frac{m}{2} \\ + * l_0 &=& \frac{n}{2} \\ + * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l} + * @f} + * + * @note The above equation for this function is similar to the default OpenCV Filter2D function, + * which actually computes a correlation and not a convolution. + * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically. + */ +template +class CLConvolutionKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; + +/** Interface for the kernel which applies a 3x3 convolution to a tensor. */ +using CLConvolution3x3Kernel = CLConvolutionKernel<3>; +/** Interface for the kernel which applies a 5x5 convolution to a tensor. */ +using CLConvolution5x5Kernel = CLConvolutionKernel<5>; +/** Interface for the kernel which applies a 7x7 convolution to a tensor. */ +using CLConvolution7x7Kernel = CLConvolutionKernel<7>; +/** Interface for the kernel which applies a 9x9 convolution to a tensor. */ +using CLConvolution9x9Kernel = CLConvolutionKernel<9>; + +/****************************************************************************************\ + * Separable Square Convolution * +\****************************************************************************************/ + +/** Kernel for the Horizontal pass of a Separable Convolution. Currently support 5x5, 7x7, 9x9 */ +template +class CLSeparableConvolutionHorKernel : public ICLSimple2DKernel +{ +public: + /** Default Constructor */ + CLSeparableConvolutionHorKernel(); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U16/S16/S32. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; + +private: + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel which applies a horizontal pass of 5x5 convolution to a tensor. */ +using CLSeparableConvolution5x5HorKernel = CLSeparableConvolutionHorKernel<5>; +/** Interface for the kernel which applies a horizontal pass of 7x7 convolution to a tensor. */ +using CLSeparableConvolution7x7HorKernel = CLSeparableConvolutionHorKernel<7>; +/** Interface for the kernel which applies a horizontal pass of 9x9 convolution to a tensor. */ +using CLSeparableConvolution9x9HorKernel = CLSeparableConvolutionHorKernel<9>; + +/** Kernel for the Vertical pass of a Separable Convolution. Currently supports 5x5, 7x7, 9x9 */ +template +class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U16/S16/S32. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U16/S16/S32. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; + +/** Interface for the kernel which applies a vertical pass of 5x5 convolution to a tensor. */ +using CLSeparableConvolution5x5VertKernel = CLSeparableConvolutionVertKernel<5>; +/** Interface for the kernel which applies a vertical pass of 7x7 convolution to a tensor. */ +using CLSeparableConvolution7x7VertKernel = CLSeparableConvolutionVertKernel<7>; +/** Interface for the kernel which applies a vertical pass of 9x9 convolution to a tensor. */ +using CLSeparableConvolution9x9VertKernel = CLSeparableConvolutionVertKernel<9>; + +/****************************************************************************************\ + * Rectangle Convolution * +\****************************************************************************************/ + +/** Kernel for the running convolution on a rectangle matrix. + * + * @note Supports combinations of 3,5,7 and 9. + */ +class CLConvolutionRectangleKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLConvolutionRectangleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvolutionRectangleKernel(const CLConvolutionRectangleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvolutionRectangleKernel &operator=(const CLConvolutionRectangleKernel &) = delete; + /** Allow instances of this class to be moved */ + CLConvolutionRectangleKernel(CLConvolutionRectangleKernel &&) = default; + /** Allow instances of this class to be moved */ + CLConvolutionRectangleKernel &operator=(CLConvolutionRectangleKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] width Width of convolution matrix (Number of columns) + * @param[in] height Height of convolution matrix (Number of rows) + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] width Width of convolution matrix (Number of columns) + * @param[in] height Height of convolution matrix (Number of rows) + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + BorderSize _border_size; + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLCONVOLUTIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLCopyKernel.cpp b/src/core/CL/kernels/CLCopyKernel.cpp index 184b80caa8..ca38b65df4 100644 --- a/src/core/CL/kernels/CLCopyKernel.cpp +++ b/src/core/CL/kernels/CLCopyKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLCopyKernel.h" +#include "src/core/CL/kernels/CLCopyKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLCopyKernel.h b/src/core/CL/kernels/CLCopyKernel.h new file mode 100644 index 0000000000..9a20b88884 --- /dev/null +++ b/src/core/CL/kernels/CLCopyKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCOPYKERNEL_H +#define ARM_COMPUTE_CLCOPYKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a copy between two tensors */ +class CLCopyKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLCopyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLCopyKernel(const CLCopyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLCopyKernel &operator=(const CLCopyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLCopyKernel(CLCopyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLCopyKernel &operator=(CLCopyKernel &&) = default; + /** Initialize the kernel's input, output. + * + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. + */ + void configure(const ICLTensor *input, ICLTensor *output, Window *output_window = nullptr); + /** Initialize the kernel's input, output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Window *output_window = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref CLCopyKernel + * + * @param[in] input Source tensor info. Data types supported: All. + * @param[in] output Destination tensor info. Data types supported: same as @p input. + * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, Window *output_window = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + Window _output_window; + bool _has_output_window; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLCOPYKERNEL_H */ diff --git a/src/core/CL/kernels/CLCropKernel.cpp b/src/core/CL/kernels/CLCropKernel.cpp index 2c99d46929..9cf15ff93b 100644 --- a/src/core/CL/kernels/CLCropKernel.cpp +++ b/src/core/CL/kernels/CLCropKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLCropKernel.h" +#include "src/core/CL/kernels/CLCropKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLCropKernel.h b/src/core/CL/kernels/CLCropKernel.h new file mode 100644 index 0000000000..cbfada58ab --- /dev/null +++ b/src/core/CL/kernels/CLCropKernel.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCROPKERNEL_H +#define ARM_COMPUTE_CLCROPKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a copy between two tensors */ +class CLCropKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLCropKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLCropKernel(const CLCropKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLCropKernel &operator=(const CLCropKernel &) = delete; + /** Allow instances of this class to be moved */ + CLCropKernel(CLCropKernel &&) = default; + /** Allow instances of this class to be moved */ + CLCropKernel &operator=(CLCropKernel &&) = default; + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC. + * @param[out] output Destination tensor. Data type supported: F32 + * @param[in] start Coordinates of where to start cropping the image. + * @param[in] end Coordinates of where to end cropping the image. + * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input. + * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. + * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. + */ + void configure(const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, Window *output_window = nullptr); + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC. + * @param[out] output Destination tensor. Data type supported: F32 + * @param[in] start Coordinates of where to start cropping the image. + * @param[in] end Coordinates of where to end cropping the image. + * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input. + * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. + * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, + Window *output_window = nullptr); + + /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] input Source tensor info. Data type supported: All. Data layouts supported: NHWC. + * @param[in] output Destination tensor info. Data type supported: F32 + * @param[in] start Coordinates of where to start cropping the image. + * @param[in] end Coordinates of where to end cropping the image. + * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input. + * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. + * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, + Window *output_window = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + Coordinates2D _start; + uint32_t _batch_index; + float _extrapolation_value; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLCROPKERNEL_H */ diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp index 9ba3dc3d8f..d01a00d61e 100644 --- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h" +#include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h new file mode 100644 index 0000000000..e0d1322341 --- /dev/null +++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H +#define ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the Deconvolution layer kernel on OpenCL. + */ +class CLDeconvolutionLayerUpsampleKernel : public ICLKernel +{ +public: + /** Constructor */ + CLDeconvolutionLayerUpsampleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDeconvolutionLayerUpsampleKernel(const CLDeconvolutionLayerUpsampleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDeconvolutionLayerUpsampleKernel &operator=(const CLDeconvolutionLayerUpsampleKernel &) = delete; + /** Default Move Constructor. */ + CLDeconvolutionLayerUpsampleKernel(CLDeconvolutionLayerUpsampleKernel &&) = default; + /** Default move assignment operator */ + CLDeconvolutionLayerUpsampleKernel &operator=(CLDeconvolutionLayerUpsampleKernel &&) = default; + /** Default destructor */ + ~CLDeconvolutionLayerUpsampleKernel() = default; + + /** Initialise the kernel's input and output. + * + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample + * + * @param[in] input Source tensor info. Data types supported: All. + * @param[in] output Destination tensor info. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + PadStrideInfo _info; + DataLayout _data_layout; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H */ diff --git a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp index 1514d906dc..ea22ec0067 100644 --- a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h" +#include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h new file mode 100644 index 0000000000..ce354fa86f --- /dev/null +++ b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H +#define ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H + +#include "src/core/CL/ICLSimpleKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the OpenCL kernel to be used for reshaping the tensor before returning the result of deconvolution. + * + * The input tensor to this OpenCL kernel is expected to be the result of a @ref CLGEMM operation between the Deconvolution input and the Deconvolution filter. + * + * The input tensor should have the following shape: [filter_width * filter_height * ofms, width, height, batch_size] + * + * The output tensor should have the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size] + * + * For example, given a tensor with dimensions [4, 2, 2] this function returns a tensor with dimensions [1, 4, 4]. + * + */ +class CLDeconvolutionReshapeOutputKernel : public ICLSimpleKernel +{ +public: + /** Default constructor */ + CLDeconvolutionReshapeOutputKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDeconvolutionReshapeOutputKernel(const CLDeconvolutionReshapeOutputKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDeconvolutionReshapeOutputKernel &operator=(const CLDeconvolutionReshapeOutputKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDeconvolutionReshapeOutputKernel(CLDeconvolutionReshapeOutputKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDeconvolutionReshapeOutputKernel &operator=(CLDeconvolutionReshapeOutputKernel &&) = default; + /** Default destructor */ + ~CLDeconvolutionReshapeOutputKernel() = default; + + /** Initialise the kernel's source and destination. + * + * @param[in] input Input tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[in] bias Bias tensor to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[out] output Output tensor with the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size] + * Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] input_info Deconvolution input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. + */ + void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info); + /** Initialise the kernel's source and destination. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[in] bias Bias tensor to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[out] output Output tensor with the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size] + * Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] input_info Deconvolution input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, + const PadStrideInfo &deconv_info); + + /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionReshapeOutputKernel. + * + * @param[in] input GEMM output tensor info to be reshaped. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[in] bias (Optional) Optional bias tensor info to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] output Reshaped output tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] input_info Original input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] weights_info Original weights tensor info output. Supported data types: same as @p input. Supported data layouts: same as @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. + * + * @return a Status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + bool _add_bias; + const ICLTensor *_bias; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H */ diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp index cb5d727e9b..78adfd202f 100644 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h" +#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h new file mode 100644 index 0000000000..6c73bd4bf4 --- /dev/null +++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H +#define ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +/** Interface for the depth concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class CLDepthConcatenateLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthConcatenateLayerKernel(const CLDepthConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthConcatenateLayerKernel &operator=(const CLDepthConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDepthConcatenateLayerKernel(CLDepthConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDepthConcatenateLayerKernel &operator=(CLDepthConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~CLDepthConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] depth_offset The offset on the Z axis. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; + +private: + unsigned int _depth_offset; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H */ diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp index 452a14bd29..c98d66f390 100644 --- a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.h b/src/core/CL/kernels/CLDepthConvertLayerKernel.h new file mode 100644 index 0000000000..8b511c6707 --- /dev/null +++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H +#define ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLSimple3DKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the depth conversion kernel. */ +class CLDepthConvertLayerKernel : public ICLSimple3DKernel +{ +public: + /** Set the input and output of the kernel. + * + * Valid conversions Input -> Output : + * + * - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data) + * - U8 -> S8, U16, S16, U32, S32, F16, F32 + * - U16 -> U8, S8, S16, U32, S32, F16, F32 + * - S16 -> U8, S8, U16, U32, S32, F16, F32 + * - U32 -> U8, S8, U16, S16, S32, F16, F32 + * - S32 -> U8, S8, U16, S16, U32, F16, F32 + * - F16 -> U8, S8, U16, S16, U32, F32 + * - F32 -> U8, S8, U16, S16, U32, F16 + * + * @param[in] input The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32. + * @param[out] output The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); + /** Set the input and output of the kernel. + * + * Valid conversions Input -> Output : + * + * - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data) + * - U8 -> S8, U16, S16, U32, S32, F16, F32 + * - U16 -> U8, S8, S16, U32, S32, F16, F32 + * - S16 -> U8, S8, U16, U32, S32, F16, F32 + * - U32 -> U8, S8, U16, S16, S32, F16, F32 + * - S32 -> U8, S8, U16, S16, U32, F16, F32 + * - F16 -> U8, S8, U16, S16, U32, F32 + * - F32 -> U8, S8, U16, S16, U32, F16 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32. + * @param[out] output The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayerKernel + * + * @param[in] input Source tensor info. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32. + * @param[in] output Destination tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift); +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H */ diff --git a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp index c3a40a286a..8946f2a713 100644 --- a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h" +#include "src/core/CL/kernels/CLDepthToSpaceLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.h b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.h new file mode 100644 index 0000000000..1f7f77b569 --- /dev/null +++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H +#define ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the depth to space kernel */ +class CLDepthToSpaceLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthToSpaceLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthToSpaceLayerKernel(const CLDepthToSpaceLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthToSpaceLayerKernel &operator=(const CLDepthToSpaceLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDepthToSpaceLayerKernel(CLDepthToSpaceLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDepthToSpaceLayerKernel &operator=(CLDepthToSpaceLayerKernel &&) = default; + /** Default destructor */ + ~CLDepthToSpaceLayerKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + */ + void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape); + /** Initialise the kernel's inputs and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape); + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayerKernel. + * + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. + * @param[in] output Tensor output info. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + ICLTensor *_output; /**< Destination tensor */ + int32_t _block_shape; /**< Block shape */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp index 7958230aac..c928677f30 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp @@ -21,11 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" @@ -34,6 +33,7 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "src/core/AccessWindowStatic.h" #include "src/core/CL/CLValidate.h" +#include "src/core/CL/ICLKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h new file mode 100644 index 0000000000..45b5869676 --- /dev/null +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H +#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H + +#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NCHW. + */ +class CLDepthwiseConvolutionLayer3x3NCHWKernel : public ICLDepthwiseConvolutionLayer3x3Kernel +{ +public: + /** Default constructor */ + CLDepthwiseConvolutionLayer3x3NCHWKernel(); + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NCHWKernel + * + * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor info. A 3D tensor with dimensions [3, 3, IFM]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[in] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. + * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor info for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, + const Size2D &dilation = Size2D(1U, 1U), const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr); + + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + unsigned int _conv_stride_x; + unsigned int _conv_pad_top; + unsigned int _conv_pad_left; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H */ diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp index 876ef1ec5d..0b673ccdba 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp @@ -21,11 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" @@ -34,6 +33,7 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "src/core/AccessWindowStatic.h" #include "src/core/CL/CLValidate.h" +#include "src/core/CL/ICLKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h new file mode 100644 index 0000000000..ce0bf5ceb3 --- /dev/null +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H +#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H + +#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NHWC. + */ +class CLDepthwiseConvolutionLayer3x3NHWCKernel : public ICLDepthwiseConvolutionLayer3x3Kernel +{ +public: + /** Default constructor */ + CLDepthwiseConvolutionLayer3x3NHWCKernel(); + /** Default move assignment operator. */ + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel + * + * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, 3, 3]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[in] output Destination tensor info. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor info for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), + const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + unsigned int _num_planes_processed_per_iteration; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H */ diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp index 4580968d38..748f4a3848 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp @@ -21,11 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" @@ -33,6 +32,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "src/core/CL/CLValidate.h" +#include "src/core/CL/ICLKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h new file mode 100644 index 0000000000..325f4e7067 --- /dev/null +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H +#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a MxN depthwise convolution. M and N are respectively the rows and columns of the filter + This kernel assumes that tensor for the weights is NOT reshaped (Native version) */ +class CLDepthwiseConvolutionLayerNativeKernel : public ICLKernel +{ +public: + /** Default Constructor */ + CLDepthwiseConvolutionLayerNativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayerNativeKernel(const CLDepthwiseConvolutionLayerNativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayerNativeKernel &operator=(const CLDepthwiseConvolutionLayerNativeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDepthwiseConvolutionLayerNativeKernel(CLDepthwiseConvolutionLayerNativeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDepthwiseConvolutionLayerNativeKernel &operator=(CLDepthwiseConvolutionLayerNativeKernel &&) = default; + /** Initialize the function's source, destination and parameters + * + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC + * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, N, M]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread + * @param[in] dwc_info Depthwise convolution layer info + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info, + const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); + /** Initialize the function's source, destination and parameters + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC + * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, N, M]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread + * @param[in] dwc_info Depthwise convolution layer info + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info, + const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel + * + * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC + * @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, N, M]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[in] output Destination tensor info. Data type supported: Same as @p input. + * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread + * @param[in] dwc_info Depthwise convolution layer info + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCWeightsKernelInfo &dwc_weights_info, + const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U), + const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_weights; + const ICLTensor *_biases; + ICLTensor *_output; + unsigned int _depth_multiplier; + const ICLTensor *_output_multipliers; + const ICLTensor *_output_shifts; + bool _is_quantized; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */ diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp index 0ff3c520ba..b10c23bde9 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp @@ -21,11 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" @@ -33,6 +32,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/AccessWindowStatic.h" #include "src/core/CL/CLValidate.h" +#include "src/core/CL/ICLKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h new file mode 100644 index 0000000000..650fe9a11b --- /dev/null +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H +#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to reshape the weights of depthwise convolution. */ +class CLDepthwiseConvolutionLayerReshapeWeightsKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthwiseConvolutionLayerReshapeWeightsKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayerReshapeWeightsKernel(const CLDepthwiseConvolutionLayerReshapeWeightsKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayerReshapeWeightsKernel &operator=(const CLDepthwiseConvolutionLayerReshapeWeightsKernel &) = delete; + /** Default Move Constructor. */ + CLDepthwiseConvolutionLayerReshapeWeightsKernel(CLDepthwiseConvolutionLayerReshapeWeightsKernel &&) = default; + /** Default move assignment operator */ + CLDepthwiseConvolutionLayerReshapeWeightsKernel &operator=(CLDepthwiseConvolutionLayerReshapeWeightsKernel &&) = default; + + /** Initialize the function's source and destination. + * + * @param[in] input The input tensor of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC + * @param[out] output The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights. + * @param[in] info Depthwise convolution information to reshape the input tensor. + */ + void configure(const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info); + /** Initialize the function's source and destination. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC + * @param[out] output The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights. + * @param[in] info Depthwise convolution information to reshape the input tensor. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info); + + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel + * + * @param[in] input The input tensor info of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC + * @param[in] output The output tensor info of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights. + * @param[in] info Depthwise convolution information to reshape the input tensor. + * + * @return a Status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const DepthwiseConvolutionReshapeInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + + void configure_dot_product(const DepthwiseConvolutionReshapeInfo &info); + void configure_generic(const DepthwiseConvolutionReshapeInfo &info); +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H */ diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp index e2c49fbf66..3723c651fe 100644 --- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h" +#include "src/core/CL/kernels/CLDequantizationLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.h b/src/core/CL/kernels/CLDequantizationLayerKernel.h new file mode 100644 index 0000000000..5579b5bc71 --- /dev/null +++ b/src/core/CL/kernels/CLDequantizationLayerKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the dequantization layer kernel. */ +class CLDequantizationLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDequantizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDequantizationLayerKernel(const CLDequantizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDequantizationLayerKernel &operator=(const CLDequantizationLayerKernel &) = delete; + /** Default Move Constructor. */ + CLDequantizationLayerKernel(CLDequantizationLayerKernel &&) = default; + /** Default move assignment operator */ + CLDequantizationLayerKernel &operator=(CLDequantizationLayerKernel &&) = default; + /** Default destructor */ + ~CLDequantizationLayerKernel() = default; + /** Set the input, output, min and max. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[out] output Destination tensor. Data types supported: F16/F32. + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input, output, min and max. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[out] output Destination tensor. Data types supported: F16/F32. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel + * + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] output Output tensor info. Data types supported: F16/F32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLDerivativeKernel.cpp b/src/core/CL/kernels/CLDerivativeKernel.cpp index 659a7cb209..5ff11362cc 100644 --- a/src/core/CL/kernels/CLDerivativeKernel.cpp +++ b/src/core/CL/kernels/CLDerivativeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h" +#include "src/core/CL/kernels/CLDerivativeKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLDerivativeKernel.h b/src/core/CL/kernels/CLDerivativeKernel.h new file mode 100644 index 0000000000..14dd05d084 --- /dev/null +++ b/src/core/CL/kernels/CLDerivativeKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDERIVATIVEKERNEL_H +#define ARM_COMPUTE_CLDERIVATIVEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the derivative kernel. */ +class CLDerivativeKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDerivativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDerivativeKernel(const CLDerivativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDerivativeKernel &operator=(const CLDerivativeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDerivativeKernel(CLDerivativeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDerivativeKernel &operator=(CLDerivativeKernel &&) = default; + /** Default destructor */ + ~CLDerivativeKernel() = default; + /** Initialise the kernel's sources, destination and border + * + * @note At least one of output_x or output_y must be set + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's sources, destination and border + * + * @note At least one of output_x or output_y must be set + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< Output tensor - Derivate along the X direction */ + ICLTensor *_output_y; /**< Output tensor - Derivate along the Y direction */ + bool _run_derivative_x; /**< Do we need to run Derivative X ? */ + bool _run_derivative_y; /**< Do we need to run Derivative Y ? */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDERIVATIVEKERNEL_H */ diff --git a/src/core/CL/kernels/CLDilateKernel.cpp b/src/core/CL/kernels/CLDilateKernel.cpp index 1e59c349e7..cac5bc1c72 100644 --- a/src/core/CL/kernels/CLDilateKernel.cpp +++ b/src/core/CL/kernels/CLDilateKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDilateKernel.h" +#include "src/core/CL/kernels/CLDilateKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLDilateKernel.h b/src/core/CL/kernels/CLDilateKernel.h new file mode 100644 index 0000000000..591ec8ccfc --- /dev/null +++ b/src/core/CL/kernels/CLDilateKernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDILATEKERNEL_H +#define ARM_COMPUTE_CLDILATEKERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the dilate kernel. + * + */ +class CLDilateKernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /**Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDILATEKERNEL_H */ diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index 161b221e81..a642eabc4e 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h" +#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.h b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.h new file mode 100644 index 0000000000..5cd674f631 --- /dev/null +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H +#define ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the direct convolution kernel. + */ +class CLDirectConvolutionLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDirectConvolutionLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDirectConvolutionLayerKernel(const CLDirectConvolutionLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDirectConvolutionLayerKernel &operator=(const CLDirectConvolutionLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDirectConvolutionLayerKernel(CLDirectConvolutionLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDirectConvolutionLayerKernel &operator=(CLDirectConvolutionLayerKernel &&) = default; + /** Default destructor */ + ~CLDirectConvolutionLayerKernel() = default; + /** Set the input, weights, biases and output tensors. + * + * @note: DirectConvolution only works in the following configurations: + * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 + * 3x3 convolution with stride_x = 1/2, stride_y = 1/2 + * 5x5 convolution with stride_x = 1/2, stride_y = 1/2 + * 9x9 convolution with stride_x = 1/2, stride_y = 1/2 + * + * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * The 3rd dimension must be the same as the input's volume 3rd dimension. + * Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type + * @param[out] output Output tensor. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); + /** Set the input, weights, biases and output tensors. + * + * @note: DirectConvolution only works in the following configurations: + * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 + * 3x3 convolution with stride_x = 1/2, stride_y = 1/2 + * 5x5 convolution with stride_x = 1/2, stride_y = 1/2 + * 9x9 convolution with stride_x = 1/2, stride_y = 1/2 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * The 3rd dimension must be the same as the input's volume 3rd dimension. + * Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type + * @param[out] output Output tensor. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayerKernel + * + * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * The 3rd dimension must be the same as the input's volume 3rd dimension. + * Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type. + * @param[in] output Output tensor. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] target Target GPU architecture. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const GPUTarget target); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +public: + const ICLTensor *_input; + const ICLTensor *_biases; + const ICLTensor *_weights; + ICLTensor *_output; + DataLayout _data_layout; + BorderSize _border_size; + int _conv_stride_x; + int _conv_stride_y; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp index bff0db07a0..38a7f1bae1 100644 --- a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp +++ b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h" +#include "src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h new file mode 100644 index 0000000000..95b5872796 --- /dev/null +++ b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H +#define ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" +#include "src/core/CL/ICLSimpleKernel.h" + +namespace arm_compute +{ +/** Interface for the elementwise unary operator */ +class CLElementWiseUnaryLayerKernel : public ICLKernel +{ +public: + /** Initialise the kernel's inputs, output. + * + * @param[in] input First tensor input info. Data types supported: F16/F32. + * @param[out] output Output tensor info. Data types supported: Same as @p input. + * @param[in] op Element wise unary operation to perform. + */ + void configure(const ITensorInfo *input, ITensorInfo *output, const ElementWiseUnary &op); + /** Initialise the kernel's inputs, output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input First tensor input info. Data types supported: F16/F32. + * @param[out] output Output tensor info. Data types supported: Same as @p input. + * @param[in] op Element wise unary operation to perform. + */ + void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const ElementWiseUnary &op); + /** Static function to check if given info will lead to a valid configuration of @ref CLElementWiseUnaryLayerKernel + * + * @param[in] input First tensor input info. Data types supported: F16/F32. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * @param[in] op Element wise unary operation to perform. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ElementWiseUnary &op); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp index da28f3d886..896ee119c1 100644 --- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp +++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" +#include "src/core/CL/kernels/CLElementwiseOperationKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.h b/src/core/CL/kernels/CLElementwiseOperationKernel.h new file mode 100644 index 0000000000..75030cf3a3 --- /dev/null +++ b/src/core/CL/kernels/CLElementwiseOperationKernel.h @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H +#define ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for an element-wise operation kernel + * + * Element-wise operation is computed by: + * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f] + * + */ +class CLElementwiseOperationKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLElementwiseOperationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLElementwiseOperationKernel(const CLElementwiseOperationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLElementwiseOperationKernel &operator=(const CLElementwiseOperationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLElementwiseOperationKernel(CLElementwiseOperationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLElementwiseOperationKernel &operator=(CLElementwiseOperationKernel &&) = default; + /** Default destructor */ + ~CLElementwiseOperationKernel() = default; + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; + +protected: + /** The name of the operation */ + virtual std::string name() = 0; + + /** Initialise the kernel's output. + * + * @param[in] input1 First tensor input info. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * + * @return a pair of Status and Window + */ + virtual std::pair validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) = 0; + + /** Generate the build options for the specific kernel + * + * @reutrn a CLBuildOptions struct + */ + virtual CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) = 0; + + /** Generate the identifier for tuning + * + * @reutrn a string + */ + virtual std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) = 0; + + /** Commmon configure function for element-wise operators with no additional options (e.g., Div, Min, Max, SquaredDiff) + * + */ + void configure_common(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output); + /** Commmon configure function for element-wise operators with no additional options (e.g., Div, Min, Max, SquaredDiff) + * + */ + void configure_common(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output); + + ActivationLayerInfo _act_info; + +private: + const ITensorInfo *_input1; /**< Source tensor info 1 */ + const ITensorInfo *_input2; /**< Source tensor info 2 */ + ITensorInfo *_output; /**< Destination tensor info */ +}; + +/** Addition operation */ +class CLSaturatedArithmeticOperationKernel : public CLElementwiseOperationKernel +{ +public: + CLSaturatedArithmeticOperationKernel() + : CLElementwiseOperationKernel(), _policy(), _op() + { + } + + /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] policy Policy to use to handle overflow. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ConvertPolicy &policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] policy Policy to use to handle overflow. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ConvertPolicy &policy, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] input2 Second tensor input info info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info info. Data types supported: Same as @p input1. + * @param[in] policy Policy to use to handle overflow. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * + * @return a Status + */ + static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ConvertPolicy &policy, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + +protected: + // Inherited methods overridden: + std::string name() override; + std::pair validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) override; + CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override; + std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) override; + +private: + ConvertPolicy _policy; + ArithmeticOperation _op; +}; + +class CLArithmeticOperationKernel : public CLElementwiseOperationKernel +{ +public: + CLArithmeticOperationKernel() + : CLElementwiseOperationKernel(), _op() + { + } + + /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * + * @return a Status + */ + static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + +protected: + // Inherited methods overridden: + std::string name() override; + std::pair validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) override; + CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override; + std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) override; + +private: + ArithmeticOperation _op; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLErodeKernel.cpp b/src/core/CL/kernels/CLErodeKernel.cpp index 29a32979a3..f6d98a5488 100644 --- a/src/core/CL/kernels/CLErodeKernel.cpp +++ b/src/core/CL/kernels/CLErodeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLErodeKernel.h" +#include "src/core/CL/kernels/CLErodeKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLErodeKernel.h b/src/core/CL/kernels/CLErodeKernel.h new file mode 100644 index 0000000000..4da97ae358 --- /dev/null +++ b/src/core/CL/kernels/CLErodeKernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLERODEKERNEL_H +#define ARM_COMPUTE_CLERODEKERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the erode kernel. + * + */ +class CLErodeKernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /**Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLERODEKERNEL_H */ diff --git a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp index 0478f550f9..922e50aa73 100644 --- a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp +++ b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h" +#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLFFTDigitReverseKernel.h b/src/core/CL/kernels/CLFFTDigitReverseKernel.h new file mode 100644 index 0000000000..2e2f1bdff4 --- /dev/null +++ b/src/core/CL/kernels/CLFFTDigitReverseKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H +#define ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** Interface for the digit reverse operation kernel. */ +class CLFFTDigitReverseKernel : public ICLKernel +{ +public: + /** Constructor */ + CLFFTDigitReverseKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFFTDigitReverseKernel(const CLFFTDigitReverseKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFFTDigitReverseKernel &operator=(const CLFFTDigitReverseKernel &) = delete; + /** Default Move Constructor. */ + CLFFTDigitReverseKernel(CLFFTDigitReverseKernel &&) = default; + /** Default move assignment operator */ + CLFFTDigitReverseKernel &operator=(CLFFTDigitReverseKernel &&) = default; + /** Default destructor */ + ~CLFFTDigitReverseKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] idx Digit reverse index tensor. Data type supported: U32 + * @param[in] config Kernel configuration. + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] idx Digit reverse index tensor. Data type supported: U32 + * @param[in] config Kernel configuration. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config); + /** Static function to check if given info will lead to a valid configuration of @ref CLFFTDigitReverseKernel + * + * @param[in] input Source tensor info. Data types supported: F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] idx Digit reverse index tensor info. Data type supported: U32 + * @param[in] config Kernel configuration. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_idx; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H */ diff --git a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp index 7b17a227e1..0f06640b64 100644 --- a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp +++ b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h" +#include "src/core/CL/kernels/CLFFTRadixStageKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLFFTRadixStageKernel.h b/src/core/CL/kernels/CLFFTRadixStageKernel.h new file mode 100644 index 0000000000..c3cc510bdd --- /dev/null +++ b/src/core/CL/kernels/CLFFTRadixStageKernel.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H +#define ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +#include + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** Interface for the FFT radix stage kernel. */ +class CLFFTRadixStageKernel : public ICLKernel +{ +public: + /** Constructor */ + CLFFTRadixStageKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFFTRadixStageKernel(const CLFFTRadixStageKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFFTRadixStageKernel &operator=(const CLFFTRadixStageKernel &) = delete; + /** Default Move Constructor. */ + CLFFTRadixStageKernel(CLFFTRadixStageKernel &&) = default; + /** Default move assignment operator */ + CLFFTRadixStageKernel &operator=(CLFFTRadixStageKernel &&) = default; + /** Default destructor */ + ~CLFFTRadixStageKernel() = default; + /** Set the input and output tensors. + * + * @note If the output tensor is nullptr, the FFT will be performed in-place + * + * @param[in,out] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input + * @param[in] config FFT descriptor metadata. + */ + void configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config); + /** Set the input and output tensors. + * + * @note If the output tensor is nullptr, the FFT will be performed in-place + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input + * @param[in] config FFT descriptor metadata. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config); + /** Static function to check if given info will lead to a valid configuration of @ref CLFFTRadixStageKernel + * + * @param[in] input Source tensor info. Data types supported: F32. + * @param[in] output Destination tensor info. Can be nullptr. Data type supported: same as @p input + * @param[in] config FFT descriptor metadata. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config); + /** Returns the radix that are support by the FFT kernel + * + * @return A set of supported radix + */ + static std::set supported_radix(); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; + bool _run_in_place; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H */ diff --git a/src/core/CL/kernels/CLFFTScaleKernel.cpp b/src/core/CL/kernels/CLFFTScaleKernel.cpp index 49fcbb6c7b..4dbe8d2e86 100644 --- a/src/core/CL/kernels/CLFFTScaleKernel.cpp +++ b/src/core/CL/kernels/CLFFTScaleKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h" +#include "src/core/CL/kernels/CLFFTScaleKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLFFTScaleKernel.h b/src/core/CL/kernels/CLFFTScaleKernel.h new file mode 100644 index 0000000000..cb007e5307 --- /dev/null +++ b/src/core/CL/kernels/CLFFTScaleKernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLFFTSCALEKERNEL_H +#define ARM_COMPUTE_CLFFTSCALEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** Interface for the inverse fft scale kernel. */ +class CLFFTScaleKernel : public ICLKernel +{ +public: + /** Constructor */ + CLFFTScaleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFFTScaleKernel(const CLFFTScaleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFFTScaleKernel &operator=(const CLFFTScaleKernel &) = delete; + /** Default Move Constructor. */ + CLFFTScaleKernel(CLFFTScaleKernel &&) = default; + /** Default move assignment operator */ + CLFFTScaleKernel &operator=(CLFFTScaleKernel &&) = default; + /** Default destructor */ + ~CLFFTScaleKernel() = default; + /** Set the input and output tensors. + * + * @param[in,out] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] config Kernel configuration + */ + void configure(ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] config Kernel configuration + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config); + /** Static function to check if given info will lead to a valid configuration of @ref CLFFTScaleKernel + * + * @param[in] input Source tensor info. Data types supported: F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] config Kernel configuration + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; + bool _run_in_place; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLFFTSCALEKERNEL_H */ diff --git a/src/core/CL/kernels/CLFastCornersKernel.cpp b/src/core/CL/kernels/CLFastCornersKernel.cpp index ebdfd2741f..7481fd1c27 100644 --- a/src/core/CL/kernels/CLFastCornersKernel.cpp +++ b/src/core/CL/kernels/CLFastCornersKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h" +#include "src/core/CL/kernels/CLFastCornersKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLFastCornersKernel.h b/src/core/CL/kernels/CLFastCornersKernel.h new file mode 100644 index 0000000000..0c1b564c2f --- /dev/null +++ b/src/core/CL/kernels/CLFastCornersKernel.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLFASTCORNERSKERNEL_H +#define ARM_COMPUTE_CLFASTCORNERSKERNEL_H + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +#include + +namespace cl +{ +class Buffer; +} + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** CL kernel to perform fast corners */ +class CLFastCornersKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLFastCornersKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFastCornersKernel(const CLFastCornersKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFastCornersKernel &operator=(const CLFastCornersKernel &) = delete; + /** Allow instances of this class to be moved */ + CLFastCornersKernel(CLFastCornersKernel &&) = default; + /** Allow instances of this class to be moved */ + CLFastCornersKernel &operator=(CLFastCornersKernel &&) = default; + /** Default destructor */ + ~CLFastCornersKernel() = default; + + /** Initialise the kernel. + * + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Output image. Data types supported: U8. + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. + * @param[in] border_mode Strategy to use for borders. + */ + void configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode); + /** Initialise the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Output image. Data types supported: U8. + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. + * @param[in] border_mode Strategy to use for borders. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode); + + // Inherited methods overridden + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLImage *_input; + ICLImage *_output; +}; + +/** CL kernel to copy keypoints information to ICLKeyPointArray and counts the number of key points */ +class CLCopyToArrayKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLCopyToArrayKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCopyToArrayKernel(const CLCopyToArrayKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCopyToArrayKernel &operator=(const CLCopyToArrayKernel &) = delete; + /** Allow instances of this class to be moved */ + CLCopyToArrayKernel(CLCopyToArrayKernel &&) = default; + /** Allow instances of this class to be moved */ + CLCopyToArrayKernel &operator=(CLCopyToArrayKernel &&) = default; + /** Default destructor */ + ~CLCopyToArrayKernel() = default; + + /** Initialise the kernel. + * + * @param[in] input Source image. Data types supported: U8. + * @param[in] update_number Flag to indicate whether we need to update the number of corners + * @param[out] corners Array of keypoints to store the results. + * @param[out] num_buffers Number of keypoints to store the results. + */ + void configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers); + /** Initialise the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source image. Data types supported: U8. + * @param[in] update_number Flag to indicate whether we need to update the number of corners + * @param[out] corners Array of keypoints to store the results. + * @param[out] num_buffers Number of keypoints to store the results. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; /**< source image */ + ICLKeyPointArray *_corners; /**< destination array */ + cl::Buffer *_num_buffer; /**< CL memory to record number of key points in the array */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLFASTCORNERSKERNEL_H */ diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp index e92619a242..5d77c291d7 100644 --- a/src/core/CL/kernels/CLFillBorderKernel.cpp +++ b/src/core/CL/kernels/CLFillBorderKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLFillBorderKernel.h b/src/core/CL/kernels/CLFillBorderKernel.h new file mode 100644 index 0000000000..7951f48171 --- /dev/null +++ b/src/core/CL/kernels/CLFillBorderKernel.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLFILLBORDERKERNEL_H +#define ARM_COMPUTE_CLFILLBORDERKERNEL_H + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for filling the border of a kernel */ +class CLFillBorderKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLFillBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFillBorderKernel(const CLFillBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFillBorderKernel &operator=(const CLFillBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + CLFillBorderKernel(CLFillBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + CLFillBorderKernel &operator=(CLFillBorderKernel &&) = default; + /** Default destructor */ + ~CLFillBorderKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + /** Initialise the kernel's input, output and border mode. + * + * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + + /** Function to set the constant value on fill border kernel depending on type. + * + * @param[in] idx Index of the kernel argument to set. + * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT. + */ + template + void set_constant_border(unsigned int idx, const PixelValue &constant_border_value); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; + void run(const Window &window, cl::CommandQueue &queue) override; + bool is_parallelisable() const override; + +private: + ICLTensor *_tensor; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLFILLBORDERKERNEL_H */ diff --git a/src/core/CL/kernels/CLFlattenLayerKernel.cpp b/src/core/CL/kernels/CLFlattenLayerKernel.cpp index 590fcee6fd..b3f84b6928 100644 --- a/src/core/CL/kernels/CLFlattenLayerKernel.cpp +++ b/src/core/CL/kernels/CLFlattenLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h" +#include "src/core/CL/kernels/CLFlattenLayerKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" diff --git a/src/core/CL/kernels/CLFlattenLayerKernel.h b/src/core/CL/kernels/CLFlattenLayerKernel.h new file mode 100644 index 0000000000..2471cf2e4a --- /dev/null +++ b/src/core/CL/kernels/CLFlattenLayerKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLFLATTENLAYERKERNEL_H +#define ARM_COMPUTE_CLFLATTENLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL interface for the flatten kernel.*/ +class CLFlattenLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLFlattenLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFlattenLayerKernel(const CLFlattenLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFlattenLayerKernel &operator=(const CLFlattenLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLFlattenLayerKernel(CLFlattenLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLFlattenLayerKernel &operator=(CLFlattenLayerKernel &&) = default; + /** Set the input and output of the kernel. + * + * @param[in] input First input tensor to flatten with at least 3 dimensions. + * The dimensions above the third will be interpreted as batches. Data types supported: All. + * @param[out] output Output tensor with shape [w*h*d, input_batches] where: + * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input and output of the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input First input tensor to flatten with at least 3 dimensions. + * The dimensions above the third will be interpreted as batches. Data types supported: All. + * @param[out] output Output tensor with shape [w*h*d, input_batches] where: + * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLFlattenLayerKernel + * + * @param[in] input First input tensor to flatten with at least 3 dimensions. + * The dimensions above the third will be interpreted as batches. Data types supported: All. + * @param[out] output Output tensor with shape [w*h*d, input_batches] where: + * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +public: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLFLATTENLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLFloorKernel.cpp b/src/core/CL/kernels/CLFloorKernel.cpp index 8884f3fe36..2af0089bf0 100644 --- a/src/core/CL/kernels/CLFloorKernel.cpp +++ b/src/core/CL/kernels/CLFloorKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFloorKernel.h" +#include "src/core/CL/kernels/CLFloorKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLFloorKernel.h b/src/core/CL/kernels/CLFloorKernel.h new file mode 100644 index 0000000000..f5635141e4 --- /dev/null +++ b/src/core/CL/kernels/CLFloorKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLFLOORKERNEL_H +#define ARM_COMPUTE_CLFLOORKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a floor operation */ +class CLFloorKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLFloorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFloorKernel(const CLFloorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFloorKernel &operator=(const CLFloorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLFloorKernel(CLFloorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLFloorKernel &operator=(CLFloorKernel &&) = default; + /** Default destructor */ + ~CLFloorKernel() = default; + /** Set the source, destination of the kernel + * + * @param[in] input Source tensor. Data type supported: F16/F32. + * @param[out] output Destination tensor. Same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); + + /** Set the source, destination of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: F16/F32. + * @param[out] output Destination tensor. Same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + + /** Static function to check if given info will lead to a valid configuration of @ref CLFloorKernel + * + * @param[in] input Source tensor info. Data type supported: F16/F32. + * @param[in] output Destination tensor info. Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLFLOORKERNEL_H */ diff --git a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp index 357231940b..2116239080 100644 --- a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h" +#include "src/core/CL/kernels/CLFuseBatchNormalizationKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.h b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.h new file mode 100644 index 0000000000..78b1e74cab --- /dev/null +++ b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H +#define ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** OpenCL kernel to fuse the batch normalization node to a preceding convolution node */ +class CLFuseBatchNormalizationKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLFuseBatchNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFuseBatchNormalizationKernel(const CLFuseBatchNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFuseBatchNormalizationKernel &operator=(const CLFuseBatchNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLFuseBatchNormalizationKernel(CLFuseBatchNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLFuseBatchNormalizationKernel &operator=(CLFuseBatchNormalizationKernel &&) = default; + /** Default destructor */ + ~CLFuseBatchNormalizationKernel() = default; + /** Set the source, destination of the kernel + * + * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC + * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights + * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights + * @param[out] fused_weights Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights + * @param[out] fused_bias Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights + * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights + * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights + * @note if nullptr, bn_beta is set to 0.0 + * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights + * @note if nullptr, bn_gamma is set to 1.0 + * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. + * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. + */ + void configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias, + const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr, + float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + /** Set the source, destination of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC + * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights + * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights + * @param[out] fused_weights Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights + * @param[out] fused_bias Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights + * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights + * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights + * @note if nullptr, bn_beta is set to 0.0 + * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights + * @note if nullptr, bn_gamma is set to 1.0 + * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. + * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias, + const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr, + float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + /** Static function to check if given info will lead to a valid configuration of @ref CLFuseBatchNormalizationKernel + * + * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC + * @param[in] bn_mean Batch normalization layer mean tensor info. Same as @p input_weights + * @param[in] bn_var Batch normalization layer variance tensor info. Same as @p input_weights + * @param[in] fused_weights Output fused weights tensor info. It can be a nullptr in case of in-place computation. Same as @p input_weights + * @param[in] fused_bias Output fused bias tensor info. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights + * @param[in] input_bias (Optional) Input bias tensor info for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights + * @param[in] bn_beta (Optional) Batch normalization layer beta tensor info. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights + * @note if nullptr, bn_beta is set to 0.0 + * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor info. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights + * @note if nullptr, bn_gamma is set to 1.0 + * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. + * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. + * + * @return a status + */ + static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, + const ITensorInfo *fused_weights, const ITensorInfo *fused_bias, + const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr, + float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input_weights; + const ICLTensor *_input_bias; + const ICLTensor *_bn_mean; + const ICLTensor *_bn_var; + const ICLTensor *_bn_gamma; + const ICLTensor *_bn_beta; + ICLTensor *_fused_weights; + ICLTensor *_fused_bias; + float _epsilon; + bool _run_in_place_weights; + bool _run_in_place_bias; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp index af7755b4e4..1f89865908 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h new file mode 100644 index 0000000000..125f0c6948 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply matrices with QASYMM8/QASYMM8_SIGNED data type */ +class CLGEMMLowpMatrixMultiplyNativeKernel : public ICLKernel +{ +public: + /** Default Constructor */ + CLGEMMLowpMatrixMultiplyNativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyNativeKernel(const CLGEMMLowpMatrixMultiplyNativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyNativeKernel &operator=(const CLGEMMLowpMatrixMultiplyNativeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyNativeKernel(CLGEMMLowpMatrixMultiplyNativeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyNativeKernel &operator=(CLGEMMLowpMatrixMultiplyNativeKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, + const GEMMReshapeInfo &gemm_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyNativeKernel + * + * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] input1 Input tensor info for the RHS matrix. Data type supported: same as @p input0 + * @param[in] output Output tensor info. Data type supported: S32 + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, + const GEMMReshapeInfo &gemm_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; + bool _slide_matrix_b; + bool _reinterpret_input_as_3d; + bool _reinterpret_output_as_3d; + bool _use_dummy_work_items; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H*/ diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp index 713d822f9b..ded4b29ae7 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h new file mode 100644 index 0000000000..100100b1b1 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply matrices when both the input matrices LHS (input0) and RHS (input1) have been reshaped + * + * @note The input matrices @p input0 and @p input1 must be reshaped through @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel + */ +class CLGEMMLowpMatrixMultiplyReshapedKernel : public ICLKernel +{ +public: + /** Default Constructor */ + CLGEMMLowpMatrixMultiplyReshapedKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyReshapedKernel(const CLGEMMLowpMatrixMultiplyReshapedKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyReshapedKernel &operator=(const CLGEMMLowpMatrixMultiplyReshapedKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyReshapedKernel(CLGEMMLowpMatrixMultiplyReshapedKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyReshapedKernel &operator=(CLGEMMLowpMatrixMultiplyReshapedKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 + * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.transpose: false + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * rhs_info.transpose: true + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @note lhs_info.k0 must be equal to rhs_info.k0 + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 + * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.transpose: false + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * rhs_info.transpose: true + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @note lhs_info.k0 must be equal to rhs_info.k0 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, + const GEMMReshapeInfo &gemm_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedKernel + * + * @param[in] input0 Input tensor info containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor info containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[in] output Output tensor info. Data type supported: S32 + * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.transpose: false + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: 2,3,4,8,16 + * rhs_info.transpose: true + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @note lhs_info.k0 must be equal to rhs_info.k0 + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, + const GEMMReshapeInfo &gemm_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; + bool _slide_matrix_b; + bool _reinterpret_output_as_3d; + unsigned int _k; + bool _use_dummy_work_items; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H*/ diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp index 33fb903813..95aa30d14a 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" @@ -195,11 +195,11 @@ std::pair validate_and_configure_window(ITensorInfo *input0, ITe { ARM_COMPUTE_UNUSED(vector_sum_row, vector_sum_col, output_multipliers, bias, output_shifts); - const GEMMLowpOutputStageInfo output_stage = gemm_info.output_stage; - unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0]; - unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1]; - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d; - bool reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d != 0); + const GEMMLowpOutputStageInfo output_stage = gemm_info.output_stage; + unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0]; + unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1]; + bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d; + bool reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d != 0); Window win{}; Window win_out{}; @@ -297,7 +297,7 @@ void CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::configure(const CLCompileCon output_multipliers != nullptr ? output_multipliers->info() : nullptr, output_shifts != nullptr ? output_shifts->info() : nullptr)); - auto padding_info = get_padding_info({ input0, input1, output, vector_sum_col, vector_sum_row, bias, output_multipliers, output_shifts }); + auto padding_info = get_padding_info({ input0, input1, output, vector_sum_col, vector_sum_row, bias, output_multipliers, output_shifts }); const GEMMRHSMatrixInfo rhs_info = gemm_info.rhs_info; const GEMMLHSMatrixInfo lhs_info = gemm_info.lhs_info; const GEMMLowpOutputStageInfo output_stage = gemm_info.output_stage; @@ -349,7 +349,7 @@ void CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::configure(const CLCompileCon // we will dispatch a batched-GEMM to reduce the complexity of the address calculation within the OpenCL kernel. // This means that the actual m used by the kernel is given by output->info()->dimension(1) and not by gemm_info.m const unsigned int internal_m = _reinterpret_output_as_3d ? gemm_info.m : output->info()->dimension(1); - // Calculate partial (store instead of load) M0 and partial N0 for the partial blocks at the end of a row/column if any. This is to avoid padding. + // Calculate partial (store instead of load) M0 and partial N0 for the partial blocks at the end of a row/column if any. This is to avoid padding. const unsigned int partial_store_m0 = internal_m % lhs_info.m0; const unsigned int partial_store_n0 = gemm_info.n % rhs_info.n0; diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h new file mode 100644 index 0000000000..222a8615e4 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply matrices with QASYMM8 data type when only the input matrix RHS (input1) has been reshaped + * + * @note The input matrix input1 must be reshaped through @ref CLGEMMReshapeRHSMatrixKernel + * @note For fused output stage, only GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT type is supported + */ +class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel : public ICLKernel +{ +public: + /** Default Constructor */ + CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(const CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &operator=(const CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &operator=(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/S32. + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info. + * Only the following values are supported for LHS info: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * Only the following values are supported for RHS info: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * rhs_info.transpose: true + * @param[in] vector_sum_col (Optional) Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32 + * @param[in] vector_sum_row (Optional) Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32 + * @param[in] bias (Optional) Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32. + * @param[in] output_multipliers (Optional) Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32. + * @param[in] output_shifts (Optional) Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32. + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr, + const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0 + * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/S32. + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info. + * Only the following values are supported for LHS info: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * Only the following values are supported for RHS info: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * rhs_info.transpose: true + * @param[in] vector_sum_col (Optional) Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32 + * @param[in] vector_sum_row (Optional) Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32 + * @param[in] bias (Optional) Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32. + * @param[in] output_multipliers (Optional) Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32. + * @param[in] output_shifts (Optional) Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr, + const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel + * + * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] input1 Input tensor info for the RHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[in] output Output tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/S32. + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info. + * Only the following values are supported for LHS info: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * Only the following values are supported for RHS info: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same as lhs_info.k0 + * rhs_info.transpose: true + * @param[in] vector_sum_col (Optional) Input row-vector info of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32 + * @param[in] vector_sum_row (Optional) Input row-vector info of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32 + * @param[in] bias (Optional) Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32. + * @param[in] output_multipliers (Optional) Output multipliers tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32. + * @param[in] output_shifts (Optional) Output shifts tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMKernelInfo &gemm_info, const ITensorInfo *vector_sum_col = nullptr, + const ITensorInfo *vector_sum_row = nullptr, const ITensorInfo *bias = nullptr, const ITensorInfo *output_multipliers = nullptr, + const ITensorInfo *output_shifts = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; + const ICLTensor *_vector_sum_col; + const ICLTensor *_vector_sum_row; + const ICLTensor *_bias; + const ICLTensor *_output_multipliers; + const ICLTensor *_output_shifts; + bool _slide_matrix_b; + bool _reinterpret_input_as_3d; + bool _reinterpret_output_as_3d; + bool _use_dummy_work_items; + bool _is_quantized_per_channel; + bool _fuse_output_stage; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H */ \ No newline at end of file diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp index aa4eea60ca..c7844b9c28 100644 --- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h new file mode 100644 index 0000000000..f8705595a0 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel used to add the offset contribution after the matrix multiplication. The computation is performed in-place + * + * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), + * and adds to it the offset contribution of matrix A and matrix B in-place. + * + * The final result is: + * + * mm_result[i][k] = mm_result[i][k] + + * (vector_sum_col[k] * a_offset) + + * (vector_sum_row[i] * b_offset) + + * (a_offset * b_offset * k) + * + */ +class CLGEMMLowpOffsetContributionKernel : public ICLKernel +{ +public: + /** Constructor */ + CLGEMMLowpOffsetContributionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpOffsetContributionKernel(const CLGEMMLowpOffsetContributionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpOffsetContributionKernel &operator=(const CLGEMMLowpOffsetContributionKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpOffsetContributionKernel(CLGEMMLowpOffsetContributionKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpOffsetContributionKernel &operator=(CLGEMMLowpOffsetContributionKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in, out] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] k Number of matrix A columns or Matrix B rows + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + */ + void configure(ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset, int32_t b_offset); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] k Number of matrix A columns or Matrix B rows + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset, + int32_t b_offset); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel + * + * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * + * @return a status + */ + static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, int32_t a_offset, int32_t b_offset); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_vector_sum_col; + const ICLTensor *_vector_sum_row; + ICLTensor *_mm_result; + const ICLTensor *_bias; +}; +} // namespace arm_compute + +#endif /* ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp index afa7bdbfdf..b41d8704bd 100644 --- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h new file mode 100644 index 0000000000..15f54d17a5 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel used to add the offset contribution after the matrix multiplication and perform the output stage. + * + * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), adds to it the offset contribution + * of matrix A and matrix B and performs the output stage defined by the output_stage argument + * + * @note For quantized computations the output data type for auto-initialization must be passed as part of the @ref GEMMLowpOutputStageInfo. + */ +class CLGEMMLowpOffsetContributionOutputStageKernel : public ICLKernel +{ +public: + /** Constructor */ + CLGEMMLowpOffsetContributionOutputStageKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpOffsetContributionOutputStageKernel(const CLGEMMLowpOffsetContributionOutputStageKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpOffsetContributionOutputStageKernel &operator=(const CLGEMMLowpOffsetContributionOutputStageKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpOffsetContributionOutputStageKernel(CLGEMMLowpOffsetContributionOutputStageKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpOffsetContributionOutputStageKernel &operator=(CLGEMMLowpOffsetContributionOutputStageKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED. + * @param[in] k Number of matrix A columns or Matrix B rows + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_stage GEMMLowp output stage info + * @param[in] output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32 + * @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32 + */ + void configure(const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k, int32_t a_offset, int32_t b_offset, + const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED. + * @param[in] k Number of matrix A columns or Matrix B rows + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_stage GEMMLowp output stage info + * @param[in] output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32 + * @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, + int32_t k, + int32_t a_offset, int32_t b_offset, + const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel + * + * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED. + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_stage GEMMLowp output stage info + * @param[in] output_multipliers Output multipliers tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32 + * @param[in] output_shifts Output shifts tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). + * Supported data types: S32 + * + * @return a status + */ + static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, const ITensorInfo *output, int32_t a_offset, + int32_t b_offset, const GEMMLowpOutputStageInfo &output_stage, const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_mm_result; + const ICLTensor *_vector_sum_col; + const ICLTensor *_vector_sum_row; + const ICLTensor *_bias; + ICLTensor *_output; + const ICLTensor *_output_multipliers; + const ICLTensor *_output_shifts; + bool _is_quantized_per_channel; +}; +} // namespace arm_compute + +#endif /* ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H */ diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp index ab1b5a2203..d0f016879e 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h new file mode 100644 index 0000000000..8653102cd8 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFIXEDPOINTKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFIXEDPOINTKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED/QSYMM16 + * + * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final quantized value. + * The following computations will be performed by the kernel: + * + * -# Compute fixed point multiplication between each entry of input by gemmlowp_multiplier + * -# Add bias to final result if bias tensor is not a nullptr + * -# Round to nearest division by a power-of-two using result_shift + * -# Add offset to each result + * -# Clamp the value between the specified min and max bounds + * -# Clamp the resulting int32 values to the proper quantized range and cast to QASYMM8/QASYMM8_SIGNED/QSYMM16. + */ +class CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel : public ICLKernel +{ +public: + /** Constructor */ + CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel(const CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel(CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM16. + * @param[in] info Output stage info. Used to pass the quantized output data type + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] output Output tensor. Data type supported: Data type supported: QSYMM8/QASYMM8_SIGNED/QSYMM16. + * @param[in] info Output stage info. Used to pass the quantized output data type + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_bias; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp index ad5bac015b..1d29dfe4b3 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h new file mode 100644 index 0000000000..0a8d5e1942 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED + * + * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. + * The following computations will be performed by the kernel: + * + * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier + * -# Add bias to final result if bias tensor is not a nullptr + * -# Requantize + * -# Add offset to each result + * -# Clamp the value between the specified min and max bounds + * -# Clamp the resulting int32 values to + * - to the [0..255] range and cast to QASYMM8. + * - to the [-128..127] range and cast to QASYMM8_SIGNED. + */ +class CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel : public ICLKernel +{ +public: + /** Constructor */ + CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(const CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] info Output stage info. Used to pass the quantized output data type + */ + void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] info Output stage info. Used to pass the quantized output data type + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] info Output stage info. Used to pass the quantized output data type + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_bias; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H */ diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp index 8e4b291dbe..d32d328fc2 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h new file mode 100644 index 0000000000..abdf33ea43 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED + * + * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. + * The following computations will be performed by the kernel: + * + * -# Add offset terms to final result + * -# Multiply each entry of result by result_mult_int + * -# Add bias to final result if bias tensor is not a nullptr + * -# Shift the int32 accumulator by result_shift + * -# Clamp the value between the specified min and max bounds + * -# Clamp the resulting int32 values: + * -# -to the [0..255] range and cast to QASYMM8. + * -# -to the [-128..127] range and cast to QASYMM8_SIGNED. + * + */ +class CLGEMMLowpQuantizeDownInt32ScaleKernel : public ICLKernel +{ +public: + /** Constructor */ + CLGEMMLowpQuantizeDownInt32ScaleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpQuantizeDownInt32ScaleKernel(const CLGEMMLowpQuantizeDownInt32ScaleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + CLGEMMLowpQuantizeDownInt32ScaleKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpQuantizeDownInt32ScaleKernel(CLGEMMLowpQuantizeDownInt32ScaleKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpQuantizeDownInt32ScaleKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] output_stage GEMMLowp output stage metadata. + */ + void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] output_stage GEMMLowp output stage metadata. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleKernel + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] output_stage GEMMLowp output stage metadata. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_bias; + ICLTensor *_output; +}; +} // namespace arm_compute + +#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H */ \ No newline at end of file diff --git a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp index 339049ff9a..d508bf6f21 100644 --- a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLGEMMLowpReductionKernel.h b/src/core/CL/kernels/CLGEMMLowpReductionKernel.h new file mode 100644 index 0000000000..237d8099b7 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.h @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H +#define ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; +struct GEMMLowpReductionKernelInfo; + +/** Common interface for all OpenCL reduction kernels */ +class ICLGEMMLowpReductionKernel : public ICLKernel +{ +public: + /** Constructor */ + ICLGEMMLowpReductionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + ICLGEMMLowpReductionKernel(const ICLGEMMLowpReductionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + ICLGEMMLowpReductionKernel &operator=(const ICLGEMMLowpReductionKernel &) = delete; + /** Allow instances of this class to be moved */ + ICLGEMMLowpReductionKernel(ICLGEMMLowpReductionKernel &&) = default; + /** Allow instances of this class to be moved */ + ICLGEMMLowpReductionKernel &operator=(ICLGEMMLowpReductionKernel &&) = default; + + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. + * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + */ + virtual void configure(const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0; + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. + * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + */ + virtual void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0; + +protected: + const ICLTensor *_input; + ICLTensor *_output; +}; + +/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A. + * + * @note This stage is needed to handle the offset of matrix product + * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md + */ +class CLGEMMLowpMatrixAReductionKernel : public ICLGEMMLowpReductionKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. + * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + */ + void configure(const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override; + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. + * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override; + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixAReductionKernel + * + * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8. + * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + * + * @return a status + */ + static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B. + * + * @note This stage is needed to handle the offset of matrix product + * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md + */ +class CLGEMMLowpMatrixBReductionKernel : public ICLGEMMLowpReductionKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. + * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + */ + void configure(const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override; + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. + * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override; + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixBReductionKernel + * + * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. + * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + * + * @return a status + */ + static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp index fd0978230d..b0d08a756c 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h new file mode 100644 index 0000000000..71d223b8ac --- /dev/null +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H +#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply two input matrices "A" and "B" and add a martix "C" if provided. All elements of the output matrix will be multiplied by alpha. In case matrix C is passed, it will be added to the previous result. + * For the matrix C, the broadcast addition is supported if the flag "broadcast_bias" is set in the GEMMReshapeInfo object + * + * @note If the input tensors @p input0 and @p input1 have been reshaped respectively with @ref CLGEMMReshapeLHSMatrixKernel" and @ref CLGEMMReshapeRHSMatrixKernel, + * the flag @p is_interleaved_transposed must be set to true + * + * @attention @p input1 tensor must have at least 2 dimensions (matrix) + * + */ +class CLGEMMMatrixMultiplyKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyKernel(const CLGEMMMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyKernel &operator=(const CLGEMMMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyKernel(CLGEMMMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyKernel &operator=(CLGEMMMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input, output and alpha + * + * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0 + * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported. + * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel + * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped + * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy + * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication + * + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f, + bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo()); + /** Initialise the kernel's input, output and alpha + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0 + * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported. + * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel + * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped + * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy + * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication + * + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f, + bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyKernel + * + * @param[in] input0 Input tensor containing the Matrix A info. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the Matrix B info. Data type supported: same as @p input0 + * @param[in] input2 Input tensor containing the Matrix C (bias) info. Can be nullptr. Data type supported: same as @p input0 + * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of vector C. Default value is 0. Only beta = 1 is currently supported. + * @param[in] is_interleaved_transposed True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel + * @param[in] reshape_info GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped + * @param[in] gpu_target GPU Target + * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy + * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, + bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, GPUTarget gpu_target, bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +public: + const ICLTensor *_input0; + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; + bool _slide_matrix_b; + bool _reinterpret_input_as_3d; + bool _reinterpret_output_as_3d; + bool _add_bias; + bool _broadcast_bias; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H */ diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp index cea147b10c..f613937f54 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h new file mode 100644 index 0000000000..6b6004b464 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H +#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply matrices when neither of the input matrices have been reshaped */ +class CLGEMMMatrixMultiplyNativeKernel : public ICLKernel +{ +public: + /** Default Constructor */ + CLGEMMMatrixMultiplyNativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyNativeKernel(const CLGEMMMatrixMultiplyNativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyNativeKernel &operator=(const CLGEMMMatrixMultiplyNativeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyNativeKernel(CLGEMMMatrixMultiplyNativeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyNativeKernel &operator=(CLGEMMMatrixMultiplyNativeKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input0 Input tensor for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. + * @param[out] output Output tensor info. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported: + * lhs_info.m0: 1,2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same of lhs_info.k0 + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. + * @param[out] output Output tensor info. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported: + * lhs_info.m0: 1,2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same of lhs_info.k0 + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, + const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyNativeKernel + * + * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor info for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0. + * @param[in] output Output tensor info. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported: + * lhs_info.m0: 1,2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.k0: same of lhs_info.k0 + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; + bool _slide_matrix_b; + bool _reinterpret_input_as_3d; + bool _reinterpret_output_as_3d; + bool _use_dummy_work_items; + bool _add_bias; + bool _broadcast_bias; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H*/ diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp index eaf57086a3..fb15b42fe2 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h new file mode 100644 index 0000000000..2ffc322def --- /dev/null +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H +#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply matrices when both the input matrices LHS (input0) and RHS (input1) have been reshaped + * + * @note The input matrices @p input0 and @p input1 must be reshaped through @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel + */ +class CLGEMMMatrixMultiplyReshapedKernel : public ICLKernel +{ +public: + /** Default Constructor */ + CLGEMMMatrixMultiplyReshapedKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyReshapedKernel(const CLGEMMMatrixMultiplyReshapedKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyReshapedKernel &operator=(const CLGEMMMatrixMultiplyReshapedKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyReshapedKernel(CLGEMMMatrixMultiplyReshapedKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyReshapedKernel &operator=(CLGEMMMatrixMultiplyReshapedKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag. + * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the + * multiplications. i.e. float c = (half)a * (half)b + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. + * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, + * the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement + * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * + * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4 + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3 + * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.transpose: false + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) + * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) + * rhs_info.transpose: true + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @note lhs_info.k0 must be equal to rhs_info.k0 + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + /** Initialise the kernel's input and output. + * + * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag. + * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the + * multiplications. i.e. float c = (half)a * (half)b + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. + * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, + * the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement + * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4 + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3 + * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.transpose: false + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) + * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) + * rhs_info.transpose: true + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @note lhs_info.k0 must be equal to rhs_info.k0 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, + const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedKernel + * + * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag. + * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the + * multiplications. i.e. float c = (half)a * (half)b + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. + * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, + * the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement + * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * + * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4 + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3 + * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0. + * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.transpose: false + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) + * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true) + * rhs_info.transpose: true + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @note lhs_info.k0 must be equal to rhs_info.k0 + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; + bool _slide_matrix_b; + bool _reinterpret_output_as_3d; + bool _use_dummy_work_items; + bool _add_bias; + bool _broadcast_bias; + bool _export_to_cl_image; + unsigned int _k; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H*/ \ No newline at end of file diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp index d53aede3c8..1f296f8e26 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" @@ -356,10 +356,10 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::run(const Window &window, cl::Co ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0); } - const size_t lhs_idx_batch_size = _reinterpret_input_as_3d && !_has_pad_y? 3u : 2u; + const size_t lhs_idx_batch_size = _reinterpret_input_as_3d && !_has_pad_y ? 3u : 2u; const size_t rhs_idx_batch_size = 2u; const size_t bia_idx_batch_size = 2u; - const size_t out_idx_batch_size = _reinterpret_output_as_3d && !_has_pad_y? 3u : 2u; + const size_t out_idx_batch_size = _reinterpret_output_as_3d && !_has_pad_y ? 3u : 2u; Window slice = window.first_slice_window_3D(); Window slice_matrix_b = slice; diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h new file mode 100644 index 0000000000..5b96679a46 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H +#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply matrices when only the input matrix RHS (input1) has been reshaped + * + * @note The input matrix input1 must be reshaped through @ref CLGEMMReshapeRHSMatrixKernel + */ +class CLGEMMMatrixMultiplyReshapedOnlyRHSKernel : public ICLKernel +{ +public: + /** Default Constructor */ + CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(const CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &operator=(const CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &operator=(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. + * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, + * the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement + * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * + * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). + * The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported: + * lhs_info.m0: 1,2,3,4,5,6,7,8 + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.k0: 2,3,4,8,16 + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.transpose: true,false + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + /** Initialise the kernel's input and output. + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. + * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, + * the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement + * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). + * The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported: + * lhs_info.m0: 1,2,3,4,5,6,7,8 + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.k0: 2,3,4,8,16 + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.transpose: true,false + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, + const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function. + * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer, + * the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement + * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * + * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). + * The number of dimensions for the LHS matrix must be less or equal than 4. + * @param[in] input1 Input tensor info for the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. + * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0. + * @param[in] output Output tensor info. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the matrix bias + * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported: + * lhs_info.m0: 1,2,3,4,5,6,7,8 + * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: + * rhs_info.k0: 2,3,4,8,16 + * rhs_info.n0: 2,3,4,8,16 + * rhs_info.transpose: true,false + * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, + const GEMMKernelInfo &gemm_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; + bool _slide_matrix_b; + bool _reinterpret_input_as_3d; + bool _reinterpret_output_as_3d; + bool _use_dummy_work_items; + bool _add_bias; + bool _broadcast_bias; + bool _export_to_cl_image; + bool _has_pad_y; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H*/ diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp index 04aa061e98..ee0abc56d3 100644 --- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h new file mode 100644 index 0000000000..bef8c231ac --- /dev/null +++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H +#define ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the GEMM matrix vector multiply kernel. **/ +class CLGEMMMatrixVectorMultiplyKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMMatrixVectorMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixVectorMultiplyKernel(const CLGEMMMatrixVectorMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixVectorMultiplyKernel &operator=(const CLGEMMMatrixVectorMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixVectorMultiplyKernel(CLGEMMMatrixVectorMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixVectorMultiplyKernel &operator=(CLGEMMMatrixVectorMultiplyKernel &&) = default; + /** Set the input and output of the kernel. + * + * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input. + * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); + /** Set the input and output of the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input. + * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixVectorMultiplyKernel + * + * @param[in] input0 The reshaped input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] input1 The 2D reshaped weights tensor info. Data type supported: Same as @p input. + * @param[in] output The output 2D tensor info. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; + int _num_rows_read_per_iteration; + BorderSize _border_size; +}; +} // arm_compute +#endif /*ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H */ diff --git a/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp index f2ad677976..3e2fc79704 100644 --- a/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp +++ b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h new file mode 100644 index 0000000000..92202a26fc --- /dev/null +++ b/src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H +#define ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to reshape the LHS matrix when performing the matrix multiplication. + * In particular, this function splits the input matrix in blocks of size M0xK0 (defined through GEMMLHSInfo) and + * stores each one in the output matrix unrolling the values + */ +class CLGEMMReshapeLHSMatrixKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMReshapeLHSMatrixKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMReshapeLHSMatrixKernel(const CLGEMMReshapeLHSMatrixKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMReshapeLHSMatrixKernel &operator=(const CLGEMMReshapeLHSMatrixKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMReshapeLHSMatrixKernel(CLGEMMReshapeLHSMatrixKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMReshapeLHSMatrixKernel &operator=(CLGEMMReshapeLHSMatrixKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary + * information to reshape the input tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.v0: greater than 0 + * lhs_info.transpose: true, false + * lhs_info.interleave: true, false + * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor + */ + void configure(const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary + * information to reshape the input tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.v0: greater than 0 + * lhs_info.transpose: true, false + * lhs_info.interleave: true, false + * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeLHSMatrixKernel + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input. + * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary + * information to reshape the input tensor. Only the following values are supported: + * lhs_info.m0: 2,3,4,5,6,7,8 + * lhs_info.k0: 2,3,4,8,16 + * lhs_info.v0: greater than 0 + * lhs_info.transpose: true, false + * lhs_info.interleave: true, false + * @param[in] reinterpret_input_as_3d True if the input has to be reinterpreted as 3D tensor + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d); + + // Inherited methods overridden + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + bool _reinterpret_input_as_3d; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H */ \ No newline at end of file diff --git a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp index d94e834d2c..33de61ed01 100644 --- a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp +++ b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h new file mode 100644 index 0000000000..911484ea76 --- /dev/null +++ b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H +#define ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to reshape the RHS matrix when performing the matrix multiplication + * In particular, this kernel splits the input matrix in blocks of size K0xN0 and stores each one in + * the output matrix unrolling the values */ +class CLGEMMReshapeRHSMatrixKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMReshapeRHSMatrixKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMReshapeRHSMatrixKernel(const CLGEMMReshapeRHSMatrixKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMReshapeRHSMatrixKernel &operator=(const CLGEMMReshapeRHSMatrixKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMReshapeRHSMatrixKernel(CLGEMMReshapeRHSMatrixKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMReshapeRHSMatrixKernel &operator=(CLGEMMReshapeRHSMatrixKernel &&) = default; + /** Default destructor */ + ~CLGEMMReshapeRHSMatrixKernel() = default; + /** Initialise the kernel's input and output. + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will guarantee the OpenCL pitch alignment for the output tensor, + * required to create a OpenCL image object from buffer in @ref CLGEMMMatrixMultiplyReshapedKernel and in @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel + * Since the OpenCL image object is created importing the OpenCL buffer, the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32, F16 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# output width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# output (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * -# The output tensor should be only consumed by @ref CLGEMMMatrixMultiplyReshapedKernel or @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary + * information to reshape the input tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image == true) + * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false), (only 4, 8 and 16 if rhs_info.export_to_cl_image == true) + * rhs_info.h0: greater than 0 + * rhs_info.transpose: true, false + * rhs_info.interleave: true, false + */ + void configure(const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info); + /** Initialise the kernel's input and output. + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will guarantee the OpenCL pitch alignment for the output tensor, + * required to create a OpenCL image object from buffer in @ref CLGEMMMatrixMultiplyReshapedKernel and in @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel + * Since the OpenCL image object is created importing the OpenCL buffer, the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32, F16 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# output width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# output (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * -# The output tensor should be only consumed by @ref CLGEMMMatrixMultiplyReshapedKernel or @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary + * information to reshape the input tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image == true) + * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false), (only 4, 8 and 16 if rhs_info.export_to_cl_image == true) + * rhs_info.h0: greater than 0 + * rhs_info.transpose: true, false + * rhs_info.interleave: true, false + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeRHSMatrixKernel + * + * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will guarantee the OpenCL pitch alignment for the output tensor, + * required to create a OpenCL image object from buffer in @ref CLGEMMMatrixMultiplyReshapedKernel and in @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel + * Since the OpenCL image object is created importing the OpenCL buffer, the following conditions are required: + * -# rhs_info.n0 can only be 4, 8 and 16 + * -# rhs_info.k0 can only be 4, 8 and 16 + * -# Data type can only be F32, F16 + * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension + * -# output width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4) + * -# output (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT + * -# The output tensor should be only consumed by @ref CLGEMMMatrixMultiplyReshapedKernel or @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input. + * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary + * information to reshape the input tensor. Only the following values are supported: + * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image == true) + * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false),(only 4, 8 and 16 if rhs_info.export_to_cl_image == true) + * rhs_info.h0: greater than 0 + * rhs_info.transpose: true, false + * rhs_info.interleave: true, false + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const GEMMRHSMatrixInfo &rhs_info); + + // Inherited methods overridden + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H */ \ No newline at end of file diff --git a/src/core/CL/kernels/CLGatherKernel.cpp b/src/core/CL/kernels/CLGatherKernel.cpp index a8508bed2d..9e802c20fb 100644 --- a/src/core/CL/kernels/CLGatherKernel.cpp +++ b/src/core/CL/kernels/CLGatherKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGatherKernel.h" +#include "src/core/CL/kernels/CLGatherKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/helpers/AutoConfiguration.h" diff --git a/src/core/CL/kernels/CLGatherKernel.h b/src/core/CL/kernels/CLGatherKernel.h new file mode 100644 index 0000000000..8f472a4696 --- /dev/null +++ b/src/core/CL/kernels/CLGatherKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGATHERKERNEL_H +#define ARM_COMPUTE_CLGATHERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to perform tensor reshaping */ +class CLGatherKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGatherKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGatherKernel(const CLGatherKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGatherKernel &operator=(const CLGatherKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGatherKernel(CLGatherKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGatherKernel &operator=(CLGatherKernel &&) = default; + /** Default destructor */ + ~CLGatherKernel() = default; + /** Initialise the kernel's inputs and outputs + * + * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All. + * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 + */ + void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); + /** Initialise the kernel's inputs and outputs + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All. + * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); + + /** Static function to check if given info will lead to a valid configuration of @ref CLGatherKernel + * + * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All. + * @param[in] indices Indices tensor info. Supported tensor rank: up to 4. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis = 0); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + const ICLTensor *_indices; /**< Indices tensor */ + ICLTensor *_output; /**< Destination tensor */ + int _axis; /**< Axis index */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGATHERKERNEL_H */ diff --git a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp index c9ed1ac0d7..40e9658ab4 100644 --- a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp +++ b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h" +#include "src/core/CL/kernels/CLGaussian3x3Kernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLGaussian3x3Kernel.h b/src/core/CL/kernels/CLGaussian3x3Kernel.h new file mode 100644 index 0000000000..139b05d44c --- /dev/null +++ b/src/core/CL/kernels/CLGaussian3x3Kernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H +#define ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the Gaussian 3x3 filter kernel. + * + */ +class CLGaussian3x3Kernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H */ diff --git a/src/core/CL/kernels/CLGaussian5x5Kernel.cpp b/src/core/CL/kernels/CLGaussian5x5Kernel.cpp index 5b3639f025..46a7576154 100644 --- a/src/core/CL/kernels/CLGaussian5x5Kernel.cpp +++ b/src/core/CL/kernels/CLGaussian5x5Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h" +#include "src/core/CL/kernels/CLGaussian5x5Kernel.h" #include diff --git a/src/core/CL/kernels/CLGaussian5x5Kernel.h b/src/core/CL/kernels/CLGaussian5x5Kernel.h new file mode 100644 index 0000000000..711710b3b3 --- /dev/null +++ b/src/core/CL/kernels/CLGaussian5x5Kernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H +#define ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H + +#include "src/core/CL/kernels/CLConvolutionKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run the horizontal pass of 5x5 Gaussian filter on a tensor. */ +class CLGaussian5x5HorKernel : public CLSeparableConvolution5x5HorKernel +{ +public: + /** Initialise the kernel's source, destination and border. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); + +private: + //Make the configure method of the parent class private + using CLSeparableConvolution5x5HorKernel::configure; +}; + +/** Interface for the kernel to run the vertical pass of 5x5 Gaussian filter on a tensor. */ +class CLGaussian5x5VertKernel : public CLSeparableConvolution5x5VertKernel +{ +public: + /** Initialise the kernel's source, destination and border. + * + * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); + +private: + //Make the configure method of the parent class private + using CLSeparableConvolution5x5VertKernel::configure; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H */ diff --git a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp index 2686e8b32e..065f7f7e92 100644 --- a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp +++ b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h" +#include "src/core/CL/kernels/CLGaussianPyramidKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLGaussianPyramidKernel.h b/src/core/CL/kernels/CLGaussianPyramidKernel.h new file mode 100644 index 0000000000..a6595440f6 --- /dev/null +++ b/src/core/CL/kernels/CLGaussianPyramidKernel.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H +#define ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H + +#include "src/core/CL/ICLSimpleKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a Gaussian filter and half scaling across width (horizontal pass) */ +class CLGaussianPyramidHorKernel : public ICLSimpleKernel +{ +public: + /** Default constructor */ + CLGaussianPyramidHorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidHorKernel(const CLGaussianPyramidHorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidHorKernel &operator=(const CLGaussianPyramidHorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGaussianPyramidHorKernel(CLGaussianPyramidHorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGaussianPyramidHorKernel &operator=(CLGaussianPyramidHorKernel &&) = default; + /** Default destructor */ + ~CLGaussianPyramidHorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16. + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + int _l2_load_offset; +}; + +/** OpenCL kernel to perform a Gaussian filter and half scaling across height (vertical pass) */ +class CLGaussianPyramidVertKernel : public ICLSimpleKernel +{ +public: + /** Default constructor */ + CLGaussianPyramidVertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidVertKernel(const CLGaussianPyramidVertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidVertKernel &operator=(const CLGaussianPyramidVertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGaussianPyramidVertKernel(CLGaussianPyramidVertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGaussianPyramidVertKernel &operator=(CLGaussianPyramidVertKernel &&) = default; + /** Default destructor */ + ~CLGaussianPyramidVertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data types supported: U16. + * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8. + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U16. + * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + int _t2_load_offset; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H */ diff --git a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp index a2fcbbab78..dd3faf50a2 100644 --- a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp +++ b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h" +#include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.h b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.h new file mode 100644 index 0000000000..d26795ac7d --- /dev/null +++ b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H +#define ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" +namespace arm_compute +{ +class ICLTensor; + +/** Interface for Compute All Anchors kernel */ +class CLComputeAllAnchorsKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLComputeAllAnchorsKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComputeAllAnchorsKernel(const CLComputeAllAnchorsKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComputeAllAnchorsKernel &operator=(const CLComputeAllAnchorsKernel &) = delete; + /** Allow instances of this class to be moved */ + CLComputeAllAnchorsKernel(CLComputeAllAnchorsKernel &&) = default; + /** Allow instances of this class to be moved */ + CLComputeAllAnchorsKernel &operator=(CLComputeAllAnchorsKernel &&) = default; + /** Default destructor */ + ~CLComputeAllAnchorsKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 + * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input + * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo + * + */ + void configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 + * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input + * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo + * + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info); + + /** Static function to check if given info will lead to a valid configuration of @ref CLComputeAllAnchorsKernel + * + * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 + * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input + * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo + * + * @return a Status + */ + static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_anchors; + ICLTensor *_all_anchors; +}; +} // arm_compute +#endif // ARM_COMPUTE_CLGENERATEPROSPOSALSLAYERKERNEL_H diff --git a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp index eaf5ea4880..cd3f1ee216 100644 --- a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp +++ b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h" +#include "src/core/CL/kernels/CLHOGDescriptorKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLHOGDescriptorKernel.h b/src/core/CL/kernels/CLHOGDescriptorKernel.h new file mode 100644 index 0000000000..eee2fa36bc --- /dev/null +++ b/src/core/CL/kernels/CLHOGDescriptorKernel.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H +#define ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H + +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/Size2D.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** OpenCL kernel to perform HOG Orientation Binning */ +class CLHOGOrientationBinningKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHOGOrientationBinningKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGOrientationBinningKernel(const CLHOGOrientationBinningKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGOrientationBinningKernel &operator=(const CLHOGOrientationBinningKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHOGOrientationBinningKernel(CLHOGOrientationBinningKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHOGOrientationBinningKernel &operator=(CLHOGOrientationBinningKernel &&) = default; + /** Default destructor */ + ~CLHOGOrientationBinningKernel() = default; + + /** Initialise the kernel's inputs, output and HOG's metadata + * + * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. + * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 + * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[in] hog_info HOG's metadata + */ + void configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info); + /** Initialise the kernel's inputs, output and HOG's metadata + * + * @param[in] compile_context The compile context to be used. + * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. + * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 + * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[in] hog_info HOG's metadata + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input_magnitude; + const ICLTensor *_input_phase; + ICLTensor *_output; + Size2D _cell_size; +}; + +/** OpenCL kernel to perform HOG block normalization */ +class CLHOGBlockNormalizationKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHOGBlockNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGBlockNormalizationKernel(const CLHOGBlockNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGBlockNormalizationKernel &operator=(const CLHOGBlockNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHOGBlockNormalizationKernel(CLHOGBlockNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHOGBlockNormalizationKernel &operator=(CLHOGBlockNormalizationKernel &&) = default; + /** Default destructor */ + ~CLHOGBlockNormalizationKernel() = default; + + /** Initialise the kernel's input, output and HOG's metadata + * + * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog_info HOG's metadata + */ + void configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info); + /** Initialise the kernel's input, output and HOG's metadata + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog_info HOG's metadata + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + Size2D _num_cells_per_block_stride; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H */ diff --git a/src/core/CL/kernels/CLHOGDetectorKernel.cpp b/src/core/CL/kernels/CLHOGDetectorKernel.cpp index 6e14996732..861155b9a2 100644 --- a/src/core/CL/kernels/CLHOGDetectorKernel.cpp +++ b/src/core/CL/kernels/CLHOGDetectorKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h" +#include "src/core/CL/kernels/CLHOGDetectorKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLHOGDetectorKernel.h b/src/core/CL/kernels/CLHOGDetectorKernel.h new file mode 100644 index 0000000000..c28e6ebe74 --- /dev/null +++ b/src/core/CL/kernels/CLHOGDetectorKernel.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLHOGDETECTORKERNEL_H +#define ARM_COMPUTE_CLHOGDETECTORKERNEL_H + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/ICLHOG.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "src/core/CL/ICLKernel.h" + +namespace cl +{ +class Buffer; +} + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform HOG detector kernel using linear SVM */ +class CLHOGDetectorKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHOGDetectorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGDetectorKernel(const CLHOGDetectorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGDetectorKernel &operator=(const CLHOGDetectorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHOGDetectorKernel(CLHOGDetectorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHOGDetectorKernel &operator=(CLHOGDetectorKernel &&) = default; + /** Default destructor */ + ~CLHOGDetectorKernel() = default; + + /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect + * + * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel + * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects + * @param[in] num_detection_windows Number of detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the hog->info()->block_stride() + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, + uint16_t idx_class = 0); + /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel + * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects + * @param[in] num_detection_windows Number of detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the hog->info()->block_stride() + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, + const Size2D &detection_window_stride, float threshold = 0.0f, + uint16_t idx_class = 0); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue); + +private: + const ICLTensor *_input; + ICLDetectionWindowArray *_detection_windows; + cl::Buffer *_num_detection_windows; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLHOGDETECTORKERNEL_H */ diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.cpp b/src/core/CL/kernels/CLHarrisCornersKernel.cpp index 19c4e579a0..cbc056fb77 100644 --- a/src/core/CL/kernels/CLHarrisCornersKernel.cpp +++ b/src/core/CL/kernels/CLHarrisCornersKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" +#include "src/core/CL/kernels/CLHarrisCornersKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.h b/src/core/CL/kernels/CLHarrisCornersKernel.h new file mode 100644 index 0000000000..6482b0aa4e --- /dev/null +++ b/src/core/CL/kernels/CLHarrisCornersKernel.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLHARRISCORNERSKERNEL_H +#define ARM_COMPUTE_CLHARRISCORNERSKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the harris score kernel. + * + * @note The implementation supports 3, 5, and 7 for the block_size. + */ +class CLHarrisScoreKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHarrisScoreKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHarrisScoreKernel(const CLHarrisScoreKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHarrisScoreKernel &operator=(const CLHarrisScoreKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHarrisScoreKernel(CLHarrisScoreKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHarrisScoreKernel &operator=(CLHarrisScoreKernel &&) = default; + /** Default destructor */ + ~CLHarrisScoreKernel() = default; + + /** Setup the kernel parameters + * + * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2) + * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1) + * @param[out] output Destination image (harris score). Data types supported F32 + * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7 + * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) + * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output, + int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, + bool border_undefined); + /** Setup the kernel parameters + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2) + * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1) + * @param[out] output Destination image (harris score). Data types supported F32 + * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7 + * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) + * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input1, const ICLImage *input2, ICLImage *output, + int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, + bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +protected: + const ICLImage *_input1; /**< Source image - Gx component */ + const ICLImage *_input2; /**< Source image - Gy component */ + ICLImage *_output; /**< Source image - Harris score */ + float _sensitivity; /**< Sensitivity value */ + float _strength_thresh; /**< Threshold value */ + float _norm_factor; /**< Normalization factor */ + BorderSize _border_size; /**< Border size */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLHARRISCORNERSKERNEL_H */ diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp index 3f5e91e5a1..8aa7366d50 100644 --- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h" +#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h new file mode 100644 index 0000000000..f4cb627052 --- /dev/null +++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H +#define ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +/** Interface for the height concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class CLHeightConcatenateLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHeightConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHeightConcatenateLayerKernel(const CLHeightConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHeightConcatenateLayerKernel &operator=(const CLHeightConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHeightConcatenateLayerKernel(CLHeightConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHeightConcatenateLayerKernel &operator=(CLHeightConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~CLHeightConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[in] height_offset The starting offset on the Y axis for the output tensor. + * @param[out] output Output tensor. Data types supported: Same as @p input. + * + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int height_offset, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLHeightConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: All. + * @param[in] height_offset The starting offset on the Y axis for the output tensor. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; + +private: + unsigned int _height_offset; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLHistogramKernel.cpp b/src/core/CL/kernels/CLHistogramKernel.cpp index a85429c1a0..ca5322aa51 100644 --- a/src/core/CL/kernels/CLHistogramKernel.cpp +++ b/src/core/CL/kernels/CLHistogramKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLHistogramKernel.h" +#include "src/core/CL/kernels/CLHistogramKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLDistribution1D.h" diff --git a/src/core/CL/kernels/CLHistogramKernel.h b/src/core/CL/kernels/CLHistogramKernel.h new file mode 100644 index 0000000000..9c97c6590d --- /dev/null +++ b/src/core/CL/kernels/CLHistogramKernel.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLHISTOGRAMKERNEL_H +#define ARM_COMPUTE_CLHISTOGRAMKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLDistribution1D; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface to run the histogram kernel. This kernel processes the part of image with width can be divided by 16. + * If the image width is not a multiple of 16, remaining pixels have to be processed with the @ref CLHistogramBorderKernel + */ +class CLHistogramKernel : public ICLKernel +{ +public: + /** Constructor */ + CLHistogramKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramKernel(const CLHistogramKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramKernel &operator=(const CLHistogramKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHistogramKernel(CLHistogramKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHistogramKernel &operator=(CLHistogramKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Destination distribution. + */ + void configure(const ICLImage *input, ICLDistribution1D *output); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Destination distribution. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; + ICLDistribution1D *_output; +}; + +/** Interface to run the histogram kernel to handle the leftover part of image + * + */ +class CLHistogramBorderKernel : public ICLKernel +{ +public: + /** Constructor */ + CLHistogramBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramBorderKernel(const CLHistogramBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramBorderKernel &operator=(const CLHistogramBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHistogramBorderKernel(CLHistogramBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHistogramBorderKernel &operator=(CLHistogramBorderKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Destination distribution. + */ + void configure(const ICLImage *input, ICLDistribution1D *output); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Destination distribution. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; + ICLDistribution1D *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLHISTOGRAMKERNEL_H*/ diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 76490f82f6..0789cdc8a7 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" +#include "src/core/CL/kernels/CLIm2ColKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLIm2ColKernel.h b/src/core/CL/kernels/CLIm2ColKernel.h new file mode 100644 index 0000000000..2920c7d138 --- /dev/null +++ b/src/core/CL/kernels/CLIm2ColKernel.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLIM2COLKERNEL_H +#define ARM_COMPUTE_CLIM2COLKERNEL_H + +#include "arm_compute/core/Size2D.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the im2col reshape kernel. + * + * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. + * It is used to transform a convolution to a plain matrix multiplication. + * + * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have: + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * = + * \left( \begin{array}{ccccccccc} + * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\ + * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\ + * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\ + * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + */ +class CLIm2ColKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLIm2ColKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLIm2ColKernel(const CLIm2ColKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLIm2ColKernel &operator=(const CLIm2ColKernel &) = delete; + /** Allow instances of this class to be moved */ + CLIm2ColKernel(CLIm2ColKernel &&) = default; + /** Allow instances of this class to be moved */ + CLIm2ColKernel &operator=(CLIm2ColKernel &&) = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, + * while every dimension above represents a batch. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + * This is valid only for non-quantized inputs. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. + * Number of groups other than 1 is only supported for NCHW data layout. + * Number of groups should be multiple to the number of channels. + */ + void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), + unsigned int num_groups = 1); + /** Set the input and output of the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, + * while every dimension above represents a batch. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, + const Size2D &dilation = Size2D(1U, 1U), + unsigned int num_groups = 1); + /** Static function to check if given info will lead to a valid configuration of @ref CLIm2ColKernel + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, + * while every dimension above represents a batch. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + * This is valid only for non-quantized inputs. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. + * Number of groups other than 1 is only supported for NCHW data layout. + * Number of groups should be multiple to the number of channels. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), + unsigned int num_groups = 1); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +public: + const ICLTensor *_input; + ICLTensor *_output; + DataLayout _data_layout; + std::pair _convolved_dims; + unsigned int _num_elems_processed_per_iteration; + Size2D _kernel_dims; + PadStrideInfo _conv_info; + unsigned int _num_groups; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLIM2COLKERNEL_H */ diff --git a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp index e97b856456..4c3b404be7 100644 --- a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h" +#include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h new file mode 100644 index 0000000000..d4444f0b20 --- /dev/null +++ b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** Interface for performing an instance normalization */ +class CLInstanceNormalizationLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLInstanceNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLInstanceNormalizationLayerKernel(const CLInstanceNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLInstanceNormalizationLayerKernel &operator=(const CLInstanceNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + CLInstanceNormalizationLayerKernel(CLInstanceNormalizationLayerKernel &&) = default; + /** Default move assignment operator */ + CLInstanceNormalizationLayerKernel &operator=(CLInstanceNormalizationLayerKernel &&) = default; + /** Default destructor */ + ~CLInstanceNormalizationLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC + * In case of @p output tensor = nullptr this tensor will store the result of the normalization. + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. + * @param[in] info Kernel meta-data descriptor + */ + void configure(ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC + * In case of @p output tensor = nullptr this tensor will store the result of the normalization. + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. + * @param[in] info Kernel meta-data descriptor + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info); + + /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer. + * + * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NHWC, NCHW + * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. + * @param[in] info Kernel meta-data descriptor + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; + bool _run_in_place; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLIntegralImageKernel.cpp b/src/core/CL/kernels/CLIntegralImageKernel.cpp index 82f6da85a5..5e5683d231 100644 --- a/src/core/CL/kernels/CLIntegralImageKernel.cpp +++ b/src/core/CL/kernels/CLIntegralImageKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h" +#include "src/core/CL/kernels/CLIntegralImageKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLIntegralImageKernel.h b/src/core/CL/kernels/CLIntegralImageKernel.h new file mode 100644 index 0000000000..0e40e3afbc --- /dev/null +++ b/src/core/CL/kernels/CLIntegralImageKernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H +#define ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H + +#include "src/core/CL/ICLKernel.h" +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface to run the horizontal pass of the integral image kernel. */ +class CLIntegralImageHorKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output Destination tensor, Data types supported: U32. + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output Destination tensor, Data types supported: U32. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); +}; + +/** Interface to run the vertical pass of the integral image kernel. */ +class CLIntegralImageVertKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLIntegralImageVertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLIntegralImageVertKernel(const CLIntegralImageVertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLIntegralImageVertKernel &operator=(const CLIntegralImageVertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLIntegralImageVertKernel(CLIntegralImageVertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLIntegralImageVertKernel &operator=(CLIntegralImageVertKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in,out] in_out The input/output tensor. Data types supported: U32 + */ + void configure(ICLTensor *in_out); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] in_out The input/output tensor. Data types supported: U32 + */ + void configure(const CLCompileContext &compile_context, ICLTensor *in_out); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_in_out; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H */ diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp index 9936e29c5f..9e91d98f7c 100644 --- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp +++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h" +#include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.h b/src/core/CL/kernels/CLL2NormalizeLayerKernel.h new file mode 100644 index 0000000000..edc0585217 --- /dev/null +++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H +#define ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */ +class CLL2NormalizeLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLL2NormalizeLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLL2NormalizeLayerKernel(const CLL2NormalizeLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLL2NormalizeLayerKernel &operator=(const CLL2NormalizeLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLL2NormalizeLayerKernel(CLL2NormalizeLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLL2NormalizeLayerKernel &operator=(CLL2NormalizeLayerKernel &&) = default; + /** Default destructor */ + ~CLL2NormalizeLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[in] sum Sum values tensor. Data types supported: same as @p input. + * Sum will have the same number of dimensions as input. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 + * @param[in] epsilon Lower bound value for the normalization. + */ + void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[in] sum Sum values tensor. Data types supported: same as @p input. + * Sum will have the same number of dimensions as input. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 + * @param[in] epsilon Lower bound value for the normalization. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon); + + /** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayerKernel. + * + * @param[in] input Source tensor info. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[in] sum Sum values tensor info. Data types supported: same as @p input. + * Sum will have the same number of dimensions as input. + * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 + * @param[in] epsilon Lower bound value for the normalization. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_sum; + ICLTensor *_output; + unsigned int _actual_axis; + float _epsilon; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLLKTrackerKernel.cpp b/src/core/CL/kernels/CLLKTrackerKernel.cpp index 0fa2e703ec..a439c2448e 100644 --- a/src/core/CL/kernels/CLLKTrackerKernel.cpp +++ b/src/core/CL/kernels/CLLKTrackerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h" +#include "src/core/CL/kernels/CLLKTrackerKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLArray.h" diff --git a/src/core/CL/kernels/CLLKTrackerKernel.h b/src/core/CL/kernels/CLLKTrackerKernel.h new file mode 100644 index 0000000000..2d2966854a --- /dev/null +++ b/src/core/CL/kernels/CLLKTrackerKernel.h @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLLKTRACKERKERNEL_H +#define ARM_COMPUTE_CLLKTRACKERKERNEL_H + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +#include +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface to run the initialization step of LKTracker */ +class CLLKTrackerInitKernel : public ICLKernel +{ +public: + /** Initialise the kernel input and output + * + * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points + * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points + * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] level The pyramid level + * @param[in] num_levels The number of pyramid levels + * @param[in] pyramid_scale Scale factor used for generating the pyramid + */ + void configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale); + /** Initialise the kernel input and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points + * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points + * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] level The pyramid level + * @param[in] num_levels The number of pyramid levels + * @param[in] pyramid_scale Scale factor used for generating the pyramid + */ + void configure(const CLCompileContext &compile_context, const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** Interface to run the finalize step of LKTracker, where it truncates the coordinates stored in new_points array */ +class CLLKTrackerFinalizeKernel : public ICLKernel +{ +public: + /** Initialise the kernel input and output + * + * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points + * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points + */ + void configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points); + /** Initialise the kernel input and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points + * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points + */ + void configure(const CLCompileContext &compile_context, ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** Interface to run the first stage of LKTracker, where A11, A12, A22, min_eig, ival, ixval and iyval are computed */ +class CLLKTrackerStage0Kernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLKTrackerStage0Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLKTrackerStage0Kernel(const CLLKTrackerStage0Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLKTrackerStage0Kernel &operator=(const CLLKTrackerStage0Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLLKTrackerStage0Kernel(CLLKTrackerStage0Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLLKTrackerStage0Kernel &operator=(CLLKTrackerStage0Kernel &&) = default; + /** Initialise the kernel input and output + * + * @param[in] old_input Pointer to the input old tensor. Data types supported: U8 + * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16 + * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16 + * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points + * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points + * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients + * @param[out] old_ival Pointer to the array holding internal values + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + */ + void configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, + size_t window_dimension, size_t level); + /** Initialise the kernel input and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] old_input Pointer to the input old tensor. Data types supported: U8 + * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16 + * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16 + * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points + * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points + * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients + * @param[out] old_ival Pointer to the array holding internal values + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, + size_t window_dimension, size_t level); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_old_input; + const ICLTensor *_old_scharr_gx; + const ICLTensor *_old_scharr_gy; +}; + +/** Interface to run the second stage of LKTracker, where the motion vectors of the given points are computed */ +class CLLKTrackerStage1Kernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLKTrackerStage1Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLKTrackerStage1Kernel(const CLLKTrackerStage1Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLKTrackerStage1Kernel &operator=(const CLLKTrackerStage1Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLLKTrackerStage1Kernel(CLLKTrackerStage1Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLLKTrackerStage1Kernel &operator=(CLLKTrackerStage1Kernel &&) = default; + /** Initialise the kernel input and output + * + * @param[in] new_input Pointer to the input new tensor. Data types supported: U8 + * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points + * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients + * @param[in] old_ival Pointer to the array holding internal values + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminating the algorithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + */ + void configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, + Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level); + /** Initialise the kernel input and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] new_input Pointer to the input new tensor. Data types supported: U8 + * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points + * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients + * @param[in] old_ival Pointer to the array holding internal values + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminating the algorithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, + Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_new_input; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLLKTRACKERKERNEL_H */ diff --git a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp index 6e4c45eab7..49e04c32c2 100644 --- a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" +#include "src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h new file mode 100644 index 0000000000..5d0a22afa5 --- /dev/null +++ b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H +#define ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply each row of first tensor with low 2 dimensions of second tensor. + * + * @attention The second input tensor must have at least 2 dimensions (matrix) + * + */ +class CLLocallyConnectedMatrixMultiplyKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLocallyConnectedMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLocallyConnectedMatrixMultiplyKernel(const CLLocallyConnectedMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLocallyConnectedMatrixMultiplyKernel &operator=(const CLLocallyConnectedMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLLocallyConnectedMatrixMultiplyKernel(CLLocallyConnectedMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLLocallyConnectedMatrixMultiplyKernel &operator=(CLLocallyConnectedMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input, output and alpha + * + * @param[in] input0 First input tensor. Data types supported: F32 + * @param[in] input1 Second input tensor. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result. Data type supported: same as @p input0 + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); + /** Initialise the kernel's input, output and alpha + * + * @param[in] compile_context The compile context to be used. + * @param[in] input0 First input tensor. Data types supported: F32 + * @param[in] input1 Second input tensor. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result. Data type supported: same as @p input0 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLLocallyConnectedMatrixMultiplyKernel + * + * @param[in] input0 First input tensor info. Data types supported: F32 + * @param[in] input1 Second input tensor info. Data type supported: same as @p input0 + * @param[in] output Output tensor info. Data type supported: same as @p input0 + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H */ diff --git a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp index dc130d0ff9..9845dd6169 100644 --- a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp +++ b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" +#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLMagnitudePhaseKernel.h b/src/core/CL/kernels/CLMagnitudePhaseKernel.h new file mode 100644 index 0000000000..514036b2ff --- /dev/null +++ b/src/core/CL/kernels/CLMagnitudePhaseKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H +#define ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Template interface for the kernel to compute magnitude and phase. + * + */ +class CLMagnitudePhaseKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLMagnitudePhaseKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMagnitudePhaseKernel(const CLMagnitudePhaseKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMagnitudePhaseKernel &operator=(const CLMagnitudePhaseKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMagnitudePhaseKernel(CLMagnitudePhaseKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMagnitudePhaseKernel &operator=(CLMagnitudePhaseKernel &&) = default; + /** Initialise the kernel's input, output. + * + * @note At least one of output1 or output2 must be set. + * + * @param[in] gx The input gradient X tensor. Data types supported: S16/S32. + * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32. + * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8. + * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. + * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. + */ + void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, + MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED); + /** Initialise the kernel's input, output. + * + * @note At least one of output1 or output2 must be set. + * + * @param[in] compile_context The compile context to be used. + * @param[in] gx The input gradient X tensor. Data types supported: S16/S32. + * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32. + * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8. + * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. + * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, + MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_gx; /**< Input gradient X. */ + const ICLTensor *_gy; /**< Input gradient Y. */ + ICLTensor *_magnitude; /**< Output - Magnitude. */ + ICLTensor *_phase; /**< Output - Phase. */ + bool _run_mag; /**< Calculate magnitude ? */ + bool _run_phase; /**< Calculate phase ? */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H */ diff --git a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp index a78996ddae..2a1312af94 100644 --- a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h" +#include "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h new file mode 100644 index 0000000000..86267ec0f7 --- /dev/null +++ b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLMAXUNPOOLINGLAYERKERNEL_H +#define ARM_COMPUTE_CLMAXUNPOOLINGLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the pooling layer kernel */ +class CLMaxUnpoolingLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMaxUnpoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMaxUnpoolingLayerKernel(const CLMaxUnpoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMaxUnpoolingLayerKernel &operator=(const CLMaxUnpoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMaxUnpoolingLayerKernel(CLMaxUnpoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMaxUnpoolingLayerKernel &operator=(CLMaxUnpoolingLayerKernel &&) = default; + /** Default destructor */ + ~CLMaxUnpoolingLayerKernel() = default; + /** Set the input and output tensors. + * + * @note Output shape must be equal to the shape of the original input to pool. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] indices Tensor containing the offset to store the input elements in the output tensor. + * @ref CLPoolingLayerKernel with indices should precede this function in order to + * properly reconstruct the output tensor. + * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, const PoolingLayerInfo &pool_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLMaxUnpoolingLayerKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Destination tensor info. Data types supported: Same as @p input. + * @param[in] indices TensorInfo associated to the tensor containing the offset to store the input elements in the output tensor. + * @ref CLPoolingLayerKernel with indices should precede this function in order to + * properly reconstruct the output tensor. + * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info); + + // Inherited methods overridden + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_indices; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLMAXUNPOOLINGLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp index 5acc3ac3d6..aed6e6eaf7 100644 --- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" +#include "src/core/CL/kernels/CLMeanStdDevKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.h b/src/core/CL/kernels/CLMeanStdDevKernel.h new file mode 100644 index 0000000000..179a2025b7 --- /dev/null +++ b/src/core/CL/kernels/CLMeanStdDevKernel.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLMEANSTDDEVKERNEL_H +#define ARM_COMPUTE_CLMEANSTDDEVKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace cl +{ +class Buffer; +} + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */ +class CLMeanStdDevKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMeanStdDevKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMeanStdDevKernel(const CLMeanStdDevKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMeanStdDevKernel &operator=(const CLMeanStdDevKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMeanStdDevKernel(CLMeanStdDevKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMeanStdDevKernel &operator=(CLMeanStdDevKernel &&) = default; + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input image. Data types supported: U8. + * @param[out] mean Input average pixel value. + * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong). + * @param[out] stddev (Optional) Output standard deviation of pixel values. + * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong). + */ + void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); + /** Initialise the kernel's input and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input image. Data types supported: U8. + * @param[out] mean Input average pixel value. + * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong). + * @param[out] stddev (Optional) Output standard deviation of pixel values. + * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong). + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevKernel. + * + * @param[in] input Input image info. Data types supported: U8. + * @param[in] mean Input average pixel value. + * @param[in] global_sum Keeps global sum of pixel values. + * @param[in] stddev (Optional) Output standard deviation of pixel values. + * @param[in] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + + BorderSize border_size() const override; + +private: + const ICLImage *_input; + float *_mean; + float *_stddev; + cl::Buffer *_global_sum; + cl::Buffer *_global_sum_squared; + BorderSize _border_size; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLMEANSTDDEVKERNEL_H */ diff --git a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp index 82a22a9f19..a889df7930 100644 --- a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h" +#include "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h new file mode 100644 index 0000000000..a1ba2b905e --- /dev/null +++ b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H +#define ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to normalize the input 2D tensor across the first dimension with respect to mean and standard deviation of the same dimension. */ +class CLMeanStdDevNormalizationKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMeanStdDevNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMeanStdDevNormalizationKernel(const CLMeanStdDevNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMeanStdDevNormalizationKernel &operator=(const CLMeanStdDevNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMeanStdDevNormalizationKernel(CLMeanStdDevNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMeanStdDevNormalizationKernel &operator=(CLMeanStdDevNormalizationKernel &&) = default; + /** Default destructor */ + ~CLMeanStdDevNormalizationKernel() = default; + /** Initialise the kernel's input and outputs. + * + * @note If the output tensor is a nullptr, the normalization will be performed in-place. + * + * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr, + * this tensor will store the result of the normalization. Data types supported: F16/F32. + * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input + * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. + */ + void configure(ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f); + /** Initialise the kernel's input and outputs. + * + * @note If the output tensor is a nullptr, the normalization will be performed in-place. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr, + * this tensor will store the result of the normalization. Data types supported: F16/F32. + * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input + * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f); + /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevNormalizationKernel + * + * @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr, + * this tensor will store the result of the normalization. Data types supported: F16/F32. + * @param[in] output (Optional) Destination tensor info. It can be nullptr in case of in-place computation. Data type supported: same as @p input + * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output = nullptr, float epsilon = 1e-8f); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; + bool _run_in_place; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLMedian3x3Kernel.cpp b/src/core/CL/kernels/CLMedian3x3Kernel.cpp index 4b899502f9..23a21d6b19 100644 --- a/src/core/CL/kernels/CLMedian3x3Kernel.cpp +++ b/src/core/CL/kernels/CLMedian3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h" +#include "src/core/CL/kernels/CLMedian3x3Kernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLMedian3x3Kernel.h b/src/core/CL/kernels/CLMedian3x3Kernel.h new file mode 100644 index 0000000000..8cc5ed7279 --- /dev/null +++ b/src/core/CL/kernels/CLMedian3x3Kernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLMEDIAN3X3KERNEL_H +#define ARM_COMPUTE_CLMEDIAN3X3KERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the median 3x3 filter kernel. + * + */ +class CLMedian3x3Kernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLMEDIAN3X3KERNEL_H */ diff --git a/src/core/CL/kernels/CLMemsetKernel.cpp b/src/core/CL/kernels/CLMemsetKernel.cpp index 186ed2a38c..2543b07a1a 100644 --- a/src/core/CL/kernels/CLMemsetKernel.cpp +++ b/src/core/CL/kernels/CLMemsetKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLMemsetKernel.h" +#include "src/core/CL/kernels/CLMemsetKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/CL/kernels/CLMemsetKernel.h b/src/core/CL/kernels/CLMemsetKernel.h new file mode 100644 index 0000000000..dc103f580f --- /dev/null +++ b/src/core/CL/kernels/CLMemsetKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLMEMSETKERNEL_H +#define ARM_COMPUTE_CLMEMSETKERNEL_H + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for filling the planes of a tensor */ +class CLMemsetKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMemsetKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMemsetKernel(const CLMemsetKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMemsetKernel &operator=(const CLMemsetKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMemsetKernel(CLMemsetKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMemsetKernel &operator=(CLMemsetKernel &&) = default; + /** Default destructor */ + ~CLMemsetKernel() = default; + + /** Initialise the kernel's tensor and filling value + * + * @param[in,out] tensor Input tensor to fill. Supported data types: All. + * @param[in] constant_value The value used to fill the planes of the tensor + * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. + */ + void configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr); + /** Initialise the kernel's tensor and filling value + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] tensor Input tensor to fill. Supported data types: All. + * @param[in] constant_value The value used to fill the planes of the tensor + * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref CLMemsetKernel + * + * @param[in] tensor Source tensor info. Data types supported: All. + * @param[in] constant_value The value used to fill the planes of the tensor + * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. + * + * @return a status + */ + static Status validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_tensor; + Window _full_window; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLMEMSETRKERNEL_H */ diff --git a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp index bf645f82e9..7017efa3c2 100644 --- a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h" +#include "src/core/CL/kernels/CLMinMaxLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLMinMaxLayerKernel.h b/src/core/CL/kernels/CLMinMaxLayerKernel.h new file mode 100644 index 0000000000..aa2ff3f375 --- /dev/null +++ b/src/core/CL/kernels/CLMinMaxLayerKernel.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLMINMAXLAYERKERNEL_H +#define ARM_COMPUTE_CLMINMAXLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to perform min max search on a 3D tensor. + */ +class CLMinMaxLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMinMaxLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLayerKernel(const CLMinMaxLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLayerKernel &operator=(const CLMinMaxLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMinMaxLayerKernel(CLMinMaxLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMinMaxLayerKernel &operator=(CLMinMaxLayerKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32. + * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor. + * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32. + * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor. + * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel + * + * @param[in] input Input tensor info. Data types supported: F32. + * @param[in] output Output tensor info with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor. + * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + /** Resets global minimum and maximum + * + * @param[in,out] queue Command queue on which to map and unmap the min_max tensor + */ + void reset(cl::CommandQueue &queue); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLMINMAXLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp index 634b58077a..675cfc19a9 100644 --- a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp +++ b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h" +#include "src/core/CL/kernels/CLMinMaxLocationKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.h b/src/core/CL/kernels/CLMinMaxLocationKernel.h new file mode 100644 index 0000000000..2196abe033 --- /dev/null +++ b/src/core/CL/kernels/CLMinMaxLocationKernel.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H +#define ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H + +#include "arm_compute/core/CL/ICLArray.h" +#include "src/core/CL/ICLKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the kernel to perform min max search on an image. + */ +class CLMinMaxKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMinMaxKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxKernel(const CLMinMaxKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxKernel &operator=(const CLMinMaxKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMinMaxKernel(CLMinMaxKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMinMaxKernel &operator=(CLMinMaxKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input Image. Data types supported: U8/S16/F32. + * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. + */ + void configure(const ICLImage *input, cl::Buffer *min_max); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input Image. Data types supported: U8/S16/F32. + * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Input image. */ + cl::Buffer *_min_max; /**< Minimum/maximum value. */ + std::array _data_type_max_min; /**< Maximum and minimum data type value respectively. */ +}; + +/** Interface for the kernel to find min max locations of an image. + */ +class CLMinMaxLocationKernel : public ICLKernel +{ +public: + /** Constructor */ + CLMinMaxLocationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLocationKernel(const CLMinMaxLocationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLocationKernel &operator=(const CLMinMaxLocationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMinMaxLocationKernel(CLMinMaxLocationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMinMaxLocationKernel &operator=(CLMinMaxLocationKernel &&) = default; + /** Initialise the kernel's input and outputs. + * + * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. + * + * @param[in] input Input image. Data types supported: U8/S16/F32. + * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. + * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32 + * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. + * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. + */ + void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, + ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr); + /** Initialise the kernel's input and outputs. + * + * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input image. Data types supported: U8/S16/F32. + * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. + * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32 + * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. + * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. + */ + void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, + ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; /**< Input image. */ + cl::Buffer *_min_max_count; /**< Minimum/maximum value occurrences. */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp index 0a8472bf04..c73acaf1d8 100644 --- a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp +++ b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h" +#include "src/core/CL/kernels/CLNonLinearFilterKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLNonLinearFilterKernel.h b/src/core/CL/kernels/CLNonLinearFilterKernel.h new file mode 100644 index 0000000000..ed42063d2b --- /dev/null +++ b/src/core/CL/kernels/CLNonLinearFilterKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H +#define ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLSimple2DKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to apply a non-linear filter */ +class CLNonLinearFilterKernel : public ICLSimple2DKernel +{ +public: + /** Default constructor */ + CLNonLinearFilterKernel(); + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data types supported: U8 + * @param[out] output Destination tensor. Data types supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, + unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, + bool border_undefined); + /** Set the source, destination and border mode of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8 + * @param[out] output Destination tensor. Data types supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, + unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, + bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; + +private: + BorderSize _border_size; /**< Border size */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H */ diff --git a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp index 9c6d44b6c5..7d5c5ba7e1 100644 --- a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp +++ b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h" +#include "src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h new file mode 100644 index 0000000000..d9ed60ce6b --- /dev/null +++ b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H +#define ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface to perform Non-Maxima suppression over a 3x3 window using OpenCL + * + * @note Used by @ref CLFastCorners and @ref CLHarrisCorners + */ +class CLNonMaximaSuppression3x3Kernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor) + * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor) + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor) + * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor) + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H */ diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp index 686e6f1b26..d1982e77b9 100644 --- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h" +#include "src/core/CL/kernels/CLNormalizationLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.h b/src/core/CL/kernels/CLNormalizationLayerKernel.h new file mode 100644 index 0000000000..739a2ae9f1 --- /dev/null +++ b/src/core/CL/kernels/CLNormalizationLayerKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the normalization layer kernel. + */ +class CLNormalizationLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLNormalizationLayerKernel(const CLNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLNormalizationLayerKernel &operator=(const CLNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + CLNormalizationLayerKernel(CLNormalizationLayerKernel &&) = default; + /** Default move assignment operator */ + CLNormalizationLayerKernel &operator=(CLNormalizationLayerKernel &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input. + * Data layouts supported: same as @p input. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input. + * Data layouts supported: same as @p input. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayerKernel + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. + * @param[in] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input. + * Data layouts supported: same as @p input. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + BorderSize _border_size; + bool _is_norm_across_width; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp index 407ce6626b..18cbe217be 100644 --- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h" +#include "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h new file mode 100644 index 0000000000..6db4433e78 --- /dev/null +++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H +#define ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the NormalizePlanarYUV layer kernel. */ +class CLNormalizePlanarYUVLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLNormalizePlanarYUVLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLNormalizePlanarYUVLayerKernel(const CLNormalizePlanarYUVLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLNormalizePlanarYUVLayerKernel &operator=(const CLNormalizePlanarYUVLayerKernel &) = delete; + /** Default Move Constructor. */ + CLNormalizePlanarYUVLayerKernel(CLNormalizePlanarYUVLayerKernel &&) = default; + /** Default move assignment operator */ + CLNormalizePlanarYUVLayerKernel &operator=(CLNormalizePlanarYUVLayerKernel &&) = default; + /** Default destructor */ + ~CLNormalizePlanarYUVLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels]. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input + * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels. + * Data types supported: same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels]. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input + * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels. + * Data types supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std); + /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayerKernel + * + * @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels]. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor info. Data type supported: same as @p input + * @param[in] mean Mean values tensor info. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input + * @param[in] std Standard deviation values tensor info. 1 dimension with size equal to the number of input channels. + * Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_mean; + const ICLTensor *_std; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLPadLayerKernel.cpp b/src/core/CL/kernels/CLPadLayerKernel.cpp index 45729738fb..485676667c 100644 --- a/src/core/CL/kernels/CLPadLayerKernel.cpp +++ b/src/core/CL/kernels/CLPadLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h" +#include "src/core/CL/kernels/CLPadLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLPadLayerKernel.h b/src/core/CL/kernels/CLPadLayerKernel.h new file mode 100644 index 0000000000..2b0abb18df --- /dev/null +++ b/src/core/CL/kernels/CLPadLayerKernel.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLPADLAYERKERNEL_H +#define ARM_COMPUTE_CLPADLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the PadLayer function. */ +class CLPadLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLPadLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPadLayerKernel(const CLPadLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPadLayerKernel &operator=(const CLPadLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPadLayerKernel(CLPadLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPadLayerKernel &operator=(CLPadLayerKernel &&) = default; + /** Default destructor */ + ~CLPadLayerKernel() = default; + /** Set the input and output tensor. + * + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] + * specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding. + * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, + * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). + */ + void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT); + /** Set the input and output tensor. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] + * specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding. + * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, + * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), + PaddingMode mode = PaddingMode::CONSTANT); + /** Static function to check if given info will lead to a valid configuration of @ref CLPadLayerKernel + * + * @param[in] input Source tensor info. Data types supported: All. + * @param[in] output Output tensor info. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] + * specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding. + * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, + * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + int _input_start_x; + int _input_start_y; + bool _4d_enabled; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLPADLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLPermuteKernel.cpp b/src/core/CL/kernels/CLPermuteKernel.cpp index 620665791f..4d289f28e6 100644 --- a/src/core/CL/kernels/CLPermuteKernel.cpp +++ b/src/core/CL/kernels/CLPermuteKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLPermuteKernel.h" +#include "src/core/CL/kernels/CLPermuteKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/helpers/AutoConfiguration.h" diff --git a/src/core/CL/kernels/CLPermuteKernel.h b/src/core/CL/kernels/CLPermuteKernel.h new file mode 100644 index 0000000000..d1bb875d7a --- /dev/null +++ b/src/core/CL/kernels/CLPermuteKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLPERMUTEKERNEL_H +#define ARM_COMPUTE_CLPERMUTEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform tensor permutation. + * + * Permutes given a permutation vector + */ +class CLPermuteKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLPermuteKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPermuteKernel(const CLPermuteKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPermuteKernel &operator=(const CLPermuteKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPermuteKernel(CLPermuteKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPermuteKernel &operator=(CLPermuteKernel &&) = default; + /** Set the input and output of the kernel. + * + * @note Arbitrary permutation vectors are supported with rank not greater than 4 + * + * @param[in] input The input tensor to permute. Data types supported: All. + * @param[in] output The output tensor. Data types supported: Same as @p input + * @param[in] perm Permutation vector + */ + void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm); + /** Set the input and output of the kernel. + * + * @note Arbitrary permutation vectors are supported with rank not greater than 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to permute. Data types supported: All. + * @param[in] output The output tensor. Data types supported: Same as @p input + * @param[in] perm Permutation vector + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm); + /** Static function to check if given info will lead to a valid configuration of @ref CLPermuteKernel + * + * @note Arbitrary permutation vectors are supported with rank not greater than 4 + * + * @param[in] input First tensor input info. Data types supported: All. + * @param[in] output Output tensor info. Data types supported: same as @p input. + * @param[in] perm Permutation vector + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + PermutationVector _perm; +}; +} // arm_compute +#endif /*ARM_COMPUTE_CLPERMUTEKERNEL_H */ diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp index a7bd4dad60..a6255f8018 100644 --- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp +++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" +#include "src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h new file mode 100644 index 0000000000..0cc4005875 --- /dev/null +++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H +#define ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** Interface for the pixelwise multiplication kernel. */ +class CLPixelWiseMultiplicationKernel : public ICLKernel +{ +public: + /** Default constructor.*/ + CLPixelWiseMultiplicationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPixelWiseMultiplicationKernel(const CLPixelWiseMultiplicationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPixelWiseMultiplicationKernel &operator=(const CLPixelWiseMultiplicationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPixelWiseMultiplicationKernel(CLPixelWiseMultiplicationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPixelWiseMultiplicationKernel &operator=(CLPixelWiseMultiplicationKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,U8) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * - (QSYMM16,QSYMM16) -> S32 + * + * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate + * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale, + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Initialise the kernel's input, output and border mode. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,U8) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * - (QSYMM16,QSYMM16) -> S32 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate + * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale, + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplicationKernel + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,U8) -> S16 + * - (S16,S16) -> S16 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * - (QSYMM16,QSYMM16) -> S32 + * + * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate + * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ITensorInfo *_input1; + const ITensorInfo *_input2; + ITensorInfo *_output; +}; + +/** Interface for the complex pixelwise multiplication kernel. */ +class CLComplexPixelWiseMultiplicationKernel : public ICLKernel +{ +public: + /** Default constructor.*/ + CLComplexPixelWiseMultiplicationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComplexPixelWiseMultiplicationKernel(const CLComplexPixelWiseMultiplicationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComplexPixelWiseMultiplicationKernel &operator=(const CLComplexPixelWiseMultiplicationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLComplexPixelWiseMultiplicationKernel(CLComplexPixelWiseMultiplicationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLComplexPixelWiseMultiplicationKernel &operator=(CLComplexPixelWiseMultiplicationKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2. + * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[out] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2. + * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[out] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplicationKernel + * + * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2. + * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ITensorInfo *_input1; + const ITensorInfo *_input2; + ITensorInfo *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp index 0570887b91..905610c31f 100644 --- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp @@ -21,11 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h" +#include "src/core/CL/kernels/CLPoolingLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" @@ -34,6 +33,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/AccessWindowStatic.h" #include "src/core/CL/CLValidate.h" +#include "src/core/CL/ICLKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" @@ -176,7 +176,7 @@ std::tuple validate_and_configure_window(ITenso case DataLayout::NHWC: { // Initialize border size - border_size = BorderSize(); + border_size = BorderSize(); num_elems_processed_per_iteration = adjust_vec_size(4, output->dimension(0)); win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.h b/src/core/CL/kernels/CLPoolingLayerKernel.h new file mode 100644 index 0000000000..d88402a792 --- /dev/null +++ b/src/core/CL/kernels/CLPoolingLayerKernel.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H +#define ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/Error.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the pooling layer kernel */ +class CLPoolingLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPoolingLayerKernel(const CLPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPoolingLayerKernel &operator=(const CLPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPoolingLayerKernel(CLPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPoolingLayerKernel &operator=(CLPoolingLayerKernel &&) = default; + /** Default destructor */ + ~CLPoolingLayerKernel() = default; + + /** Set the input and output tensors. + * + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. + */ + void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr); + /** Set the input and output tensors. + * + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayerKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Destination tensor info. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +public: + const ICLTensor *_input; + ICLTensor *_output; + ICLTensor *_indices; + PoolingLayerInfo _pool_info; + DataLayout _data_layout; + BorderSize _border_size; + unsigned int _num_elems_processed_per_iteration; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp index 202e9fbb37..7b9caf0063 100644 --- a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp +++ b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h" +#include "src/core/CL/kernels/CLPriorBoxLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLPriorBoxLayerKernel.h b/src/core/CL/kernels/CLPriorBoxLayerKernel.h new file mode 100644 index 0000000000..6c369a7a4e --- /dev/null +++ b/src/core/CL/kernels/CLPriorBoxLayerKernel.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H +#define ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the PriorBox layer kernel. */ +class CLPriorBoxLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLPriorBoxLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPriorBoxLayerKernel(const CLPriorBoxLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPriorBoxLayerKernel &operator=(const CLPriorBoxLayerKernel &) = delete; + /** Default Move Constructor. */ + CLPriorBoxLayerKernel(CLPriorBoxLayerKernel &&) = default; + /** Default move assignment operator */ + CLPriorBoxLayerKernel &operator=(CLPriorBoxLayerKernel &&) = default; + /** Default destructor */ + ~CLPriorBoxLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. + * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 + * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1 + * @param[in] info Prior box layer info. + * @param[in] min Minimum prior box values + * @param[in] max Maximum prior box values + * @param[in] aspect_ratios Aspect ratio values + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, cl::Buffer *aspect_ratios); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. + * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 + * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1 + * @param[in] info Prior box layer info. + * @param[in] min Minimum prior box values + * @param[in] max Maximum prior box values + * @param[in] aspect_ratios Aspect ratio values + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, + cl::Buffer *aspect_ratios); + /** Static function to check if given info will lead to a valid configuration of @ref CLPriorBoxLayerKernel + * + * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC. + * @param[in] input2 Second source tensor info. Data types and layouts supported: same as @p input1 + * @param[in] output Destination tensor info. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input1 + * @param[in] info Prior box layer info. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; + PriorBoxLayerInfo _info; + int _num_priors; + cl::Buffer *_min; + cl::Buffer *_max; + cl::Buffer *_aspect_ratios; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp index ff6cc86103..3a66d084b9 100644 --- a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h" +#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "src/core/helpers/AutoConfiguration.h" diff --git a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h new file mode 100644 index 0000000000..31085c37ba --- /dev/null +++ b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H +#define ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to do layer normalization. */ +class CLQLSTMLayerNormalizationKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLQLSTMLayerNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLQLSTMLayerNormalizationKernel(const CLQLSTMLayerNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLQLSTMLayerNormalizationKernel &operator=(const CLQLSTMLayerNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLQLSTMLayerNormalizationKernel(CLQLSTMLayerNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLQLSTMLayerNormalizationKernel &operator=(CLQLSTMLayerNormalizationKernel &&) = default; + /** Default destructor */ + ~CLQLSTMLayerNormalizationKernel() = default; + /** Initialise the kernel's input and outputs. + * + * @param[in] input Source tensor with 2 dimensions. Data types supported: QSYMM16. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] weight Weight tensor. Data types supported: Same as @p input. + * @param[in] bias Bias tensor. Data types supported: S32. + * + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias); + /** Initialise the kernel's input and outputs. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor with 2 dimensions. Data types supported: QSYMM16. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] weight Weight tensor. Data types supported: Same as @p input. + * @param[in] bias Bias tensor. Data types supported: S32. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias); + /** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayerNormalizationKernel + * + * @param[in] input Source tensor info with 2 dimensions. Data types supported: QSYMM16. + * @param[in] output Destination info tensor. Data type supported: same as @p input + * @param[in] weight Weight info tensor. Data types supported: Same as @p input. + * @param[in] bias Bias tensor info. Data types supported: S32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_weight; + const ICLTensor *_bias; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp index 44889b9407..76e703f0dd 100644 --- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h" +#include "src/core/CL/kernels/CLQuantizationLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.h b/src/core/CL/kernels/CLQuantizationLayerKernel.h new file mode 100644 index 0000000000..e9d03decb3 --- /dev/null +++ b/src/core/CL/kernels/CLQuantizationLayerKernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the quantization layer kernel. + * + * @note The implementation supports only 3D input tensors. + */ +class CLQuantizationLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLQuantizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLQuantizationLayerKernel(const CLQuantizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLQuantizationLayerKernel &operator=(const CLQuantizationLayerKernel &) = delete; + /** Default Move Constructor. */ + CLQuantizationLayerKernel(CLQuantizationLayerKernel &&) = default; + /** Default move assignment operator */ + CLQuantizationLayerKernel &operator=(CLQuantizationLayerKernel &&) = default; + /** Default destructor */ + ~CLQuantizationLayerKernel() = default; + /** Set the input, output. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. + * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. + * + * @note Output auto initialization is not supported by this kernel + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input, output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. + * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. + * + * @note Output auto initialization is not supported by this kernel + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLQuantizationLayerKernel + * + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. + * @param[in] output Destination tensor info with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp index ca6c6fad1a..38eafc6e97 100644 --- a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp +++ b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h" +#include "src/core/CL/kernels/CLROIAlignLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLROIAlignLayerKernel.h b/src/core/CL/kernels/CLROIAlignLayerKernel.h new file mode 100644 index 0000000000..cbf0e00165 --- /dev/null +++ b/src/core/CL/kernels/CLROIAlignLayerKernel.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H +#define ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H + +#include "arm_compute/core/CL/ICLArray.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the RoIAlign kernel. + */ +class CLROIAlignLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLROIAlignLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLROIAlignLayerKernel(const CLROIAlignLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLROIAlignLayerKernel &operator=(const CLROIAlignLayerKernel &) = delete; + /** Default Move Constructor. */ + CLROIAlignLayerKernel(CLROIAlignLayerKernel &&) = default; + /** Default move assignment operator. */ + CLROIAlignLayerKernel &operator=(CLROIAlignLayerKernel &&) = default; + /** Default destructor */ + ~CLROIAlignLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner + * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. + * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + */ + void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner + * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. + * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLROIAlignLayerKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, + * otherwise same as @p input + * @param[in] output Destination tensor info. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + * + * @return a Status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue); + +private: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_rois; + ROIPoolingLayerInfo _pool_info; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H*/ diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp index 55fe5a5321..43492a3d50 100644 --- a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h" +#include "src/core/CL/kernels/CLROIPoolingLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.h b/src/core/CL/kernels/CLROIPoolingLayerKernel.h new file mode 100644 index 0000000000..35f42a9676 --- /dev/null +++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H +#define ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +#include "arm_compute/core/CL/ICLArray.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the ROI pooling layer kernel */ +class CLROIPoolingLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLROIPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLROIPoolingLayerKernel(const CLROIPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLROIPoolingLayerKernel &operator=(const CLROIPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLROIPoolingLayerKernel(CLROIPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLROIPoolingLayerKernel &operator=(CLROIPoolingLayerKernel &&) = default; + /** Default destructor */ + ~CLROIPoolingLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32. + * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner + * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16 + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + */ + void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: F16/F32. + * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner + * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16 + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_rois; + ICLTensor *_output; + ROIPoolingLayerInfo _pool_info; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLRangeKernel.cpp b/src/core/CL/kernels/CLRangeKernel.cpp index a4c30b63c2..892f1c7c9f 100644 --- a/src/core/CL/kernels/CLRangeKernel.cpp +++ b/src/core/CL/kernels/CLRangeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLRangeKernel.h" +#include "src/core/CL/kernels/CLRangeKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLRangeKernel.h b/src/core/CL/kernels/CLRangeKernel.h new file mode 100644 index 0000000000..1b94a099ed --- /dev/null +++ b/src/core/CL/kernels/CLRangeKernel.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLRANGEKERNEL_H +#define ARM_COMPUTE_CLRANGEKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Kernel class for Range + * + * range generates a 1-D tensor containing a sequence of numbers that begins at 'start' and extends by increments + * of 'step' up to but not including 'end'. + */ +class CLRangeKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLRangeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLRangeKernel(const CLRangeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLRangeKernel &operator=(const CLRangeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLRangeKernel(CLRangeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLRangeKernel &operator=(CLRangeKernel &&) = default; + /** Default destructor */ + ~CLRangeKernel() = default; + /** Initialize the kernel's output tensor, start, end and step of the sequence. + * + * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] start The starting value of the sequence. + * @param[in] end The ending (not including) value of the sequence. + * @param[in] step The gap between each pair of values in the sequence. + */ + void configure(ICLTensor *output, float start, float end, float step); + /** Initialize the kernel's output tensor, start, end and step of the sequence. + * + * @param[in] compile_context The compile context to be used. + * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] start The starting value of the sequence. + * @param[in] end The ending (not including) value of the sequence. + * @param[in] step The gap between each pair of values in the sequence. + */ + void configure(const CLCompileContext &compile_context, ICLTensor *output, float start, float end, float step); + /** Static function to check if given info will lead to a valid configuration of @ref CLRangeKernel + * + * @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] start The starting value of the sequence. + * @param[in] end The ending (not including) value of the sequence. + * @param[in] step The gap between each pair of values in the sequence. + * + * @return a status + */ + static Status validate(const ITensorInfo *output, float start, float end, float step); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + float _start; /**< Start of sequence */ + float _end; /**< End of sequence */ + float _step; /**< Increment/step value */ + ICLTensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLRANGEKERNEL_H */ diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp index 325e4b994c..9d49a2193a 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.cpp +++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" +#include "src/core/CL/kernels/CLReductionOperationKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLReductionOperationKernel.h b/src/core/CL/kernels/CLReductionOperationKernel.h new file mode 100644 index 0000000000..ff9fd61484 --- /dev/null +++ b/src/core/CL/kernels/CLReductionOperationKernel.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H +#define ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the reduction operation kernel + */ +class CLReductionOperationKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLReductionOperationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLReductionOperationKernel(const CLReductionOperationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLReductionOperationKernel &operator=(const CLReductionOperationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLReductionOperationKernel(CLReductionOperationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLReductionOperationKernel &operator=(CLReductionOperationKernel &&) = default; + /** Default destructor */ + ~CLReductionOperationKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 + * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX + * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image. + */ + void configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 + * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX + * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0); + + /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel. + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. + * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 + * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX + * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, unsigned int width = 0); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + unsigned int _reduction_axis; + ReductionOperation _op; + BorderSize _border_size; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLRemapKernel.cpp b/src/core/CL/kernels/CLRemapKernel.cpp index 8d3f41b35f..0ebeefcc74 100644 --- a/src/core/CL/kernels/CLRemapKernel.cpp +++ b/src/core/CL/kernels/CLRemapKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLRemapKernel.h" +#include "src/core/CL/kernels/CLRemapKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLRemapKernel.h b/src/core/CL/kernels/CLRemapKernel.h new file mode 100644 index 0000000000..8efcf091ed --- /dev/null +++ b/src/core/CL/kernels/CLRemapKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLREMAPKERNEL_H +#define ARM_COMPUTE_CLREMAPKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a remap on a tensor */ +class CLRemapKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLRemapKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLRemapKernel(const CLRemapKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLRemapKernel &operator=(const CLRemapKernel &) = delete; + /** Allow instances of this class to be moved */ + CLRemapKernel(CLRemapKernel &&) = default; + /** Allow instances of this class to be moved */ + CLRemapKernel &operator=(CLRemapKernel &&) = default; + /** Initialize the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] map_x Map for X coordinates. Data types supported: F32. + * @param[in] map_y Map for Y coordinates. Data types supported: F32. + * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. + * @param[in] policy The interpolation type. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); + /** Initialize the kernel's input, output and border mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] map_x Map for X coordinates. Data types supported: F32. + * @param[in] map_y Map for Y coordinates. Data types supported: F32. + * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. + * @param[in] policy The interpolation type. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_map_x; + const ICLTensor *_map_y; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLREMAPKERNEL_H */ diff --git a/src/core/CL/kernels/CLReorgLayerKernel.cpp b/src/core/CL/kernels/CLReorgLayerKernel.cpp index ade7761b91..662c790ca2 100644 --- a/src/core/CL/kernels/CLReorgLayerKernel.cpp +++ b/src/core/CL/kernels/CLReorgLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLReorgLayerKernel.h" +#include "src/core/CL/kernels/CLReorgLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLReorgLayerKernel.h b/src/core/CL/kernels/CLReorgLayerKernel.h new file mode 100644 index 0000000000..455a6170c6 --- /dev/null +++ b/src/core/CL/kernels/CLReorgLayerKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLREORGLAYERKERNEL_H +#define ARM_COMPUTE_CLREORGLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a reorg layer */ +class CLReorgLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLReorgLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLReorgLayerKernel(const CLReorgLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLReorgLayerKernel &operator=(const CLReorgLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLReorgLayerKernel(CLReorgLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLReorgLayerKernel &operator=(CLReorgLayerKernel &&) = default; + /** Initialize the kernel's input, output. + * + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor with tensor shape: + * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has + * the same number of input elements. Data types supported: same as @p input. + * @param[in] stride Stride value to use for reorganizing the values in the output tensor. + * It defines the spatial distance between 2 consecutive pixels in the x and y direction + */ + void configure(const ICLTensor *input, ICLTensor *output, int32_t stride); + /** Initialize the kernel's input, output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor with tensor shape: + * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has + * the same number of input elements. Data types supported: same as @p input. + * @param[in] stride Stride value to use for reorganizing the values in the output tensor. + * It defines the spatial distance between 2 consecutive pixels in the x and y direction + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t stride); + /** Static function to check if given info will lead to a valid configuration of @ref CLReorgLayerKernel + * + * @param[in] input Source tensor. Data types supported: All. + * @param[in] output Destination tensor with tensor shape: + * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has + * the same number of input elements. Data types supported: same as @p input. Data types supported: same as @p input. + * @param[in] stride Stride value to use for reorganizing the values in the output tensor + * It defines the spatial distance between 2 consecutive pixels in the x and y direction + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t stride); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLREORGLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/CL/kernels/CLReshapeLayerKernel.cpp index b14013bc34..58d7843624 100644 --- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp +++ b/src/core/CL/kernels/CLReshapeLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h" +#include "src/core/CL/kernels/CLReshapeLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.h b/src/core/CL/kernels/CLReshapeLayerKernel.h new file mode 100644 index 0000000000..902c44649b --- /dev/null +++ b/src/core/CL/kernels/CLReshapeLayerKernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLRESHAPELAYERKERNEL_H +#define ARM_COMPUTE_CLRESHAPELAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to perform tensor reshaping */ +class CLReshapeLayerKernel : public ICLKernel +{ +public: + /** Set the input and output of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor info. Data type supported: All. + * @param[out] output Destination tensor info. Data type supported: Same as @p input + */ + void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output); + + /** Static function to check if given info will lead to a valid configuration of @ref CLReshapeLayerKernel + * + * @param[in] input Source tensor info. Data type supported: All + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLRESHAPELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLReverseKernel.cpp b/src/core/CL/kernels/CLReverseKernel.cpp index f8240984d1..9a876258e9 100644 --- a/src/core/CL/kernels/CLReverseKernel.cpp +++ b/src/core/CL/kernels/CLReverseKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLReverseKernel.h" +#include "src/core/CL/kernels/CLReverseKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLReverseKernel.h b/src/core/CL/kernels/CLReverseKernel.h new file mode 100644 index 0000000000..4a21e4f802 --- /dev/null +++ b/src/core/CL/kernels/CLReverseKernel.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLREVERSEKERNEL_H +#define ARM_COMPUTE_CLREVERSEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the reverse kernel */ +class CLReverseKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLReverseKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLReverseKernel(const CLReverseKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLReverseKernel &operator=(const CLReverseKernel &) = delete; + /** Allow instances of this class to be moved */ + CLReverseKernel(CLReverseKernel &&) = default; + /** Allow instances of this class to be moved */ + CLReverseKernel &operator=(CLReverseKernel &&) = default; + /** Default destructor */ + ~CLReverseKernel() = default; + /** Initialise the kernel's inputis and output + * + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis); + /** Initialise the kernel's inputis and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis); + + /** Static function to check if given info will lead to a valid configuration of @ref CLReverseKernel + * + * @param[in] input Input tensor info. Data types supported: All. + * @param[in] output Output tensor info. Data type supported: Same as @p input + * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +public: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_axis; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLREVERSEKERNEL_H */ diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp index 8233f210b4..5a7d5830fd 100644 --- a/src/core/CL/kernels/CLScaleKernel.cpp +++ b/src/core/CL/kernels/CLScaleKernel.cpp @@ -21,11 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLScaleKernel.h" +#include "src/core/CL/kernels/CLScaleKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Error.h" @@ -33,6 +32,7 @@ #include "arm_compute/core/TensorInfo.h" #include "src/core/AccessWindowStatic.h" #include "src/core/CL/CLValidate.h" +#include "src/core/CL/ICLKernel.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" diff --git a/src/core/CL/kernels/CLScaleKernel.h b/src/core/CL/kernels/CLScaleKernel.h new file mode 100644 index 0000000000..a72e3938d9 --- /dev/null +++ b/src/core/CL/kernels/CLScaleKernel.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSCALEKERNEL_H +#define ARM_COMPUTE_CLSCALEKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the scale kernel */ +class CLScaleKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's inputs, output and interpolation policy + * + * @param[in] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 + * @param[out] output Destination tensor. Data types supported: Same as @p input + * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] info @ref ScaleKernelInfo Kernel descriptor to be used to configure. + */ + void configure(const ICLTensor *input, ICLTensor *output, const ScaleKernelInfo &info); + /** Initialise the kernel's inputs, output and interpolation policy + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 + * @param[out] output Destination tensor. Data types supported: Same as @p input + * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] info @ref ScaleKernelInfo Kernel descriptor to be used to configure. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ScaleKernelInfo &info); + + /** Static function to check if given info will lead to a valid configuration of @ref CLScaleKernel + * + * @param[in] input Source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 + * @param[in] output Destination tensor info. Data types supported: Same as @p input + * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] info @ref ScaleKernelInfo Kernel descriptor to be used to validate + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ScaleKernelInfo &info); + /** Input tensor accessor. + * + * @return Pointer to input tensor. + */ + const ICLTensor *input() const; + /** Output tensor accessor. + * + * @return Pointer to output tensor. + */ + const ICLTensor *output() const; + + // Inherited methods overridden: + BorderSize border_size() const override; + void run(const Window &window, cl::CommandQueue &queue) override; + + // Getter for interpolation policy + InterpolationPolicy get_interpolation_policy() const + { + return _interpolation_policy; + } + +private: + InterpolationPolicy _interpolation_policy = InterpolationPolicy::BILINEAR; + DataLayout _data_layout = DataLayout::UNKNOWN; + bool _align_corners = false; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLSCALEKERNEL_H */ diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.cpp b/src/core/CL/kernels/CLScharr3x3Kernel.cpp index 1e33af3047..7ceddc9626 100644 --- a/src/core/CL/kernels/CLScharr3x3Kernel.cpp +++ b/src/core/CL/kernels/CLScharr3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLScharr3x3Kernel.h" +#include "src/core/CL/kernels/CLScharr3x3Kernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.h b/src/core/CL/kernels/CLScharr3x3Kernel.h new file mode 100644 index 0000000000..a670da5b6f --- /dev/null +++ b/src/core/CL/kernels/CLScharr3x3Kernel.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSCHARR3X3KERNEL_H +#define ARM_COMPUTE_CLSCHARR3X3KERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 Scharr filter on a tensor. + * + * @f[ + * \mathbf{G}_x=\begin{vmatrix} + * -3 & 0 & +3\\ + * -10& 0 & +10\\ + * -3 & 0 & +3 + * \end{vmatrix} + * @f] + * @f[ + * \mathbf{G}_y=\begin{vmatrix} + * -3 & -10 & -3\\ + * 0 & 0 & 0\\ + * +3 & +10 & +3 + * \end{vmatrix} + * @f] + */ +class CLScharr3x3Kernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLScharr3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLScharr3x3Kernel(const CLScharr3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLScharr3x3Kernel &operator=(const CLScharr3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLScharr3x3Kernel(CLScharr3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLScharr3x3Kernel &operator=(CLScharr3x3Kernel &&) = default; + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + bool _run_scharr_x; /**< Do we need to run Scharr X ? */ + bool _run_scharr_y; /**< Do we need to run Scharr Y ? */ + const ICLTensor *_input; /**< Input image */ + ICLTensor *_output_x; /**< Output image for scharr X */ + ICLTensor *_output_y; /**< Output image for scharr Y */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLSCHARR3X3KERNEL_H */ diff --git a/src/core/CL/kernels/CLSelectKernel.cpp b/src/core/CL/kernels/CLSelectKernel.cpp index d9a1044e1f..53e5414c88 100644 --- a/src/core/CL/kernels/CLSelectKernel.cpp +++ b/src/core/CL/kernels/CLSelectKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLSelectKernel.h" +#include "src/core/CL/kernels/CLSelectKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLSelectKernel.h b/src/core/CL/kernels/CLSelectKernel.h new file mode 100644 index 0000000000..93ae27f444 --- /dev/null +++ b/src/core/CL/kernels/CLSelectKernel.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSELECTKERNEL_H +#define ARM_COMPUTE_CLSELECTKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** OpenCL interface for executing the select kernel + * + * Select is computed by: + * @f[ output(i) = condition(i) ? x(i) : y(i) @f] + **/ +class CLSelectKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLSelectKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSelectKernel(const CLSelectKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSelectKernel &operator=(const CLSelectKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSelectKernel(CLSelectKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSelectKernel &operator=(CLSelectKernel &&) = default; + /** Default destructor */ + ~CLSelectKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] c Condition input tensor. Data types supported: U8. + * @param[in] x First input tensor. Data types supported: All. + * @param[out] y Second input tensor. Data types supported: Same as @p x + * @param[in] output Output tensor. Data types supported: Same as @p x. + */ + void configure(const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output); + /** Initialise the kernel's inputs and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] c Condition input tensor. Data types supported: U8. + * @param[in] x First input tensor. Data types supported: All. + * @param[out] y Second input tensor. Data types supported: Same as @p x + * @param[in] output Output tensor. Data types supported: Same as @p x. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLSelectKernel + * + * @param[in] c Condition input tensor. Data types supported: U8. + * @param[in] x First input tensor. Data types supported: All. + * @param[in] y Second input tensor. Data types supported: Same as @p x + * @param[in] output Output tensor. Data types supported: Same as @p x. + * + * @return a status + */ + static Status validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_c; /**< Condition tensor */ + const ICLTensor *_x; /**< Source tensor 1 */ + const ICLTensor *_y; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ + bool _has_same_rank; /**< Flag that indicates if condition tensor and other inputs have the same rank */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLWHEREKERNEL_H */ diff --git a/src/core/CL/kernels/CLSobel3x3Kernel.cpp b/src/core/CL/kernels/CLSobel3x3Kernel.cpp index 89e5207c44..a87677a38f 100644 --- a/src/core/CL/kernels/CLSobel3x3Kernel.cpp +++ b/src/core/CL/kernels/CLSobel3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLSobel3x3Kernel.h" +#include "src/core/CL/kernels/CLSobel3x3Kernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLSobel3x3Kernel.h b/src/core/CL/kernels/CLSobel3x3Kernel.h new file mode 100644 index 0000000000..fed8068762 --- /dev/null +++ b/src/core/CL/kernels/CLSobel3x3Kernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSOBEL3X3KERNEL_H +#define ARM_COMPUTE_CLSOBEL3X3KERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 Sobel filter on a tensor. */ +class CLSobel3x3Kernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel3x3Kernel(const CLSobel3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel3x3Kernel &operator=(const CLSobel3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel3x3Kernel(CLSobel3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel3x3Kernel &operator=(CLSobel3x3Kernel &&) = default; + /** Default destructor */ + ~CLSobel3x3Kernel() = default; + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< Output tensor for Sobel X */ + ICLTensor *_output_y; /**< Output tensor for Sobel Y */ + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLSOBEL3X3KERNEL_H */ diff --git a/src/core/CL/kernels/CLSobel5x5Kernel.cpp b/src/core/CL/kernels/CLSobel5x5Kernel.cpp index 3e765e47fb..c450becd1d 100644 --- a/src/core/CL/kernels/CLSobel5x5Kernel.cpp +++ b/src/core/CL/kernels/CLSobel5x5Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h" +#include "src/core/CL/kernels/CLSobel5x5Kernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLSobel5x5Kernel.h b/src/core/CL/kernels/CLSobel5x5Kernel.h new file mode 100644 index 0000000000..a163ac932a --- /dev/null +++ b/src/core/CL/kernels/CLSobel5x5Kernel.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSOBEL5X5KERNEL_H +#define ARM_COMPUTE_CLSOBEL5X5KERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. */ +class CLSobel5x5HorKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel5x5HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel5x5HorKernel(const CLSobel5x5HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel5x5HorKernel &operator=(const CLSobel5x5HorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel5x5HorKernel(CLSobel5x5HorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel5x5HorKernel &operator=(CLSobel5x5HorKernel &&) = default; + /** Default destructor */ + ~CLSobel5x5HorKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< X output of horizontal pass */ + ICLTensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 5x5 Sobel filter on a tensor. */ +class CLSobel5x5VertKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel5x5VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel5x5VertKernel(const CLSobel5x5VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel5x5VertKernel &operator=(const CLSobel5x5VertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel5x5VertKernel(CLSobel5x5VertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel5x5VertKernel &operator=(CLSobel5x5VertKernel &&) = default; + /** Default destructor */ + ~CLSobel5x5VertKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set and the corresponding input. + * + * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16. + * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set and the corresponding input. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16. + * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */ + const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */ + ICLTensor *_output_x; /**< X output of sobel */ + ICLTensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLSOBEL5X5KERNEL_H */ diff --git a/src/core/CL/kernels/CLSobel7x7Kernel.cpp b/src/core/CL/kernels/CLSobel7x7Kernel.cpp index 37ceaba502..1cfa74f7b3 100644 --- a/src/core/CL/kernels/CLSobel7x7Kernel.cpp +++ b/src/core/CL/kernels/CLSobel7x7Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h" +#include "src/core/CL/kernels/CLSobel7x7Kernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLSobel7x7Kernel.h b/src/core/CL/kernels/CLSobel7x7Kernel.h new file mode 100644 index 0000000000..c85f0aedf9 --- /dev/null +++ b/src/core/CL/kernels/CLSobel7x7Kernel.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSOBEL7X7KERNEL_H +#define ARM_COMPUTE_CLSOBEL7X7KERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. */ +class CLSobel7x7HorKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel7x7HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel7x7HorKernel(const CLSobel7x7HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel7x7HorKernel &operator=(const CLSobel7x7HorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel7x7HorKernel(CLSobel7x7HorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel7x7HorKernel &operator=(CLSobel7x7HorKernel &&) = default; + /** Default destructor */ + ~CLSobel7x7HorKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< X output of horizontal pass */ + ICLTensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 7x7 Sobel filter on a tensor. */ +class CLSobel7x7VertKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel7x7VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel7x7VertKernel(const CLSobel7x7VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSobel7x7VertKernel &operator=(const CLSobel7x7VertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel7x7VertKernel(CLSobel7x7VertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel7x7VertKernel &operator=(CLSobel7x7VertKernel &&) = default; + /** Default destructor */ + ~CLSobel7x7VertKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set and the corresponding input. + * + * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32. + * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set and the corresponding input. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32. + * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */ + const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */ + ICLTensor *_output_x; /**< X output of sobel */ + ICLTensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLSOBEL7X7KERNEL_H */ diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp index 5c0acda41a..d9f498c522 100644 --- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" +#include "src/core/CL/kernels/CLSoftmaxLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.h b/src/core/CL/kernels/CLSoftmaxLayerKernel.h new file mode 100644 index 0000000000..29e0f63e46 --- /dev/null +++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H +#define ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "src/core/CL/ICLSimple3DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for max, shifting, exponentiating and summing the logits */ +class CLLogits1DMaxShiftExpSumKernel : public ICLKernel +{ +public: + /** Info for whether a parallel reduction will be run and the vector size of the execution. */ + using ParallelReductionInfo = std::tuple; + +public: + /** Default constructor */ + CLLogits1DMaxShiftExpSumKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DMaxShiftExpSumKernel(const CLLogits1DMaxShiftExpSumKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DMaxShiftExpSumKernel &operator=(const CLLogits1DMaxShiftExpSumKernel &) = delete; + /** Allow instances of this class to be moved */ + CLLogits1DMaxShiftExpSumKernel(CLLogits1DMaxShiftExpSumKernel &&) = default; + /** Allow instances of this class to be moved */ + CLLogits1DMaxShiftExpSumKernel &operator=(CLLogits1DMaxShiftExpSumKernel &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in,out] max Max values tensor. Data types supported: same as @p input + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input + * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. + */ + void configure(const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in,out] max Max values tensor. Data types supported: same as @p input + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input + * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxShiftExpSumKernel + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] max Max values tensor. Data types supported: same as @p input + * @param[in] output Destination tensor. Data types supported: same as @p input + * @param[in] sum Sum of 1D logits tensor. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum); + /** Checks if the given size is eligible for parallel reduction + * + * @note Serial reduction is launched for width < (_grid_size * _serial_vector_size). + * @note Parallel reduction is launched for width >= (_grid_size * _serial_vector_size) and vector_size is forced to 4. + * + * @param[in] size Size to check + * + * @return A two-element tuple where the first element is a boolean specifying if a parallel reduction will be run, + * while the second element is the vector size of the execution. + */ + static ParallelReductionInfo is_parallel_reduction(size_t size); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_max; + ICLTensor *_output; + ICLTensor *_sum; + +private: + static const unsigned int _grid_size; + static const unsigned int _serial_vector_size; + static const unsigned int _parallel_vector_size; +}; +/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */ +class CLLogits1DNormKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLogits1DNormKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DNormKernel(const CLLogits1DNormKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DNormKernel &operator=(const CLLogits1DNormKernel &) = delete; + /** Allow instances of this class to be moved */ + CLLogits1DNormKernel(CLLogits1DNormKernel &&) = default; + /** Allow instances of this class to be moved */ + CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported. + * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input + * @param[out] output Destination tensor. Data types supported: QASYMM8/QASYMM8_SIGNED for S32 @p input, or same as @p input + * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. + */ + void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info); + /** Set the input and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported. + * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input + * @param[out] output Destination tensor. Data types supported: QASYMM8/QASYMM8_SIGNED for S32 @p input, or same as @p input + * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DNormKernel + * + * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported. + * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input + * @param[in] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input + * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, const SoftmaxKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_sum; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp index c6f70c3c09..91b889a10a 100644 --- a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp +++ b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h" +#include "src/core/CL/kernels/CLSpaceToBatchLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.h b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.h new file mode 100644 index 0000000000..4819c80fce --- /dev/null +++ b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H +#define ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the space to batch kernel */ +class CLSpaceToBatchLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLSpaceToBatchLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSpaceToBatchLayerKernel(const CLSpaceToBatchLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSpaceToBatchLayerKernel &operator=(const CLSpaceToBatchLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSpaceToBatchLayerKernel(CLSpaceToBatchLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSpaceToBatchLayerKernel &operator=(CLSpaceToBatchLayerKernel &&) = default; + /** Default destructor */ + ~CLSpaceToBatchLayerKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output); + /** Initialise the kernel's inputs and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output); + /** Initialise the kernel's input and output. (Static block shape and paddings) + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[in] padding_left The left padding of the output tensor. + * @param[in] padding_right The right padding of the output tensor. + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output); + /** Initialise the kernel's input and output. (Static block shape and paddings) + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[in] padding_left The left padding of the output tensor. + * @param[in] padding_right The right padding of the output tensor. + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, + ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 + * @param[in] output Tensor output. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel (Static block shape and paddings) + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[in] padding_left The left padding of the output tensor. + * @param[in] padding_right The right padding of the output tensor. + * @param[in] output Tensor output. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + const ICLTensor *_block_shape; /**< Block shape tensor */ + const ICLTensor *_paddings; /**< Paddings tensor */ + ICLTensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp index 2d46aade34..1c648e0944 100644 --- a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp +++ b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h" +#include "src/core/CL/kernels/CLSpaceToDepthLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.h b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.h new file mode 100644 index 0000000000..bb1ac5f9a6 --- /dev/null +++ b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H +#define ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the space to depth kernel */ +class CLSpaceToDepthLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLSpaceToDepthLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSpaceToDepthLayerKernel(const CLSpaceToDepthLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSpaceToDepthLayerKernel &operator=(const CLSpaceToDepthLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSpaceToDepthLayerKernel(CLSpaceToDepthLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSpaceToDepthLayerKernel &operator=(CLSpaceToDepthLayerKernel &&) = default; + /** Default destructor */ + ~CLSpaceToDepthLayerKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + */ + void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape); + /** Initialise the kernel's inputs and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape); + /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToDepthLayerKernel. + * + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. + * @param[in] output Tensor output info. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + ICLTensor *_output; /**< Destination tensor */ + int32_t _block_shape; /**< Block shape */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLStackLayerKernel.cpp b/src/core/CL/kernels/CLStackLayerKernel.cpp index 5055065779..9bdcc8dc3f 100644 --- a/src/core/CL/kernels/CLStackLayerKernel.cpp +++ b/src/core/CL/kernels/CLStackLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLStackLayerKernel.h" +#include "src/core/CL/kernels/CLStackLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLStackLayerKernel.h b/src/core/CL/kernels/CLStackLayerKernel.h new file mode 100644 index 0000000000..2865127a90 --- /dev/null +++ b/src/core/CL/kernels/CLStackLayerKernel.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_CLSTACKLAYERKERNEL_H +#define ARM_COMPUTE_CLSTACKLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/ +class CLStackLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLStackLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLStackLayerKernel(const CLStackLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLStackLayerKernel &operator=(const CLStackLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLStackLayerKernel(CLStackLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLStackLayerKernel &operator=(CLStackLayerKernel &&) = default; + /** Default destructor */ + ~CLStackLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @note Supported input tensor rank: up to 4 + * + * @param[in] input Input tensor. Data types supported: All. + * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. + * @param[in] idx_input Index of the input tensor in the list of tensors to stack. + * All tensors in the list must have the same shape + * @param[in] num_tensors Number of tensors to stack + * @param[out] output Output tensor. Data types supported: Same as @p input. + * + */ + void configure(const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output); + /** Initialise the kernel's inputs and output + * + * @note Supported input tensor rank: up to 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. + * @param[in] idx_input Index of the input tensor in the list of tensors to stack. + * All tensors in the list must have the same shape + * @param[in] num_tensors Number of tensors to stack + * @param[out] output Output tensor. Data types supported: Same as @p input. + * + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLStackLayerKernel + * + * @note Supported input tensor rank: up to 4 + * + * @param[in] input Input tensor info. Data types supported: All. + * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. + * @param[in] idx_input Index of the input tensor in the list of tensors to stack + * All tensors in the list must have the same shape + * @param[in] num_tensors Number of tensors to stack + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLSTACKLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp index b632e05d84..c87fcb9765 100644 --- a/src/core/CL/kernels/CLStridedSliceKernel.cpp +++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h" +#include "src/core/CL/kernels/CLStridedSliceKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/utils/helpers/tensor_transform.h" diff --git a/src/core/CL/kernels/CLStridedSliceKernel.h b/src/core/CL/kernels/CLStridedSliceKernel.h new file mode 100644 index 0000000000..599cf34c39 --- /dev/null +++ b/src/core/CL/kernels/CLStridedSliceKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H +#define ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +#include + +namespace arm_compute +{ +/** Interface for the kernel to perform tensor strided slicing */ +class CLStridedSliceKernel : public ICLKernel +{ +public: + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor info. Data type supported: All. + * @param[out] output Destination tensor info. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. + * A slice of size 1 starting from starts[i] in the dimension must be preserved. + */ + void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, + const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, + int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); + + /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] input Source tensor. Data type supported: All. + * @param[in] output Destination tensor. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. + * A slice of size 1 starting from starts[i] in the dimension must be preserved. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, + int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H */ diff --git a/src/core/CL/kernels/CLTableLookupKernel.cpp b/src/core/CL/kernels/CLTableLookupKernel.cpp index 3b8ca60ab1..b82f4c9889 100644 --- a/src/core/CL/kernels/CLTableLookupKernel.cpp +++ b/src/core/CL/kernels/CLTableLookupKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h" +#include "src/core/CL/kernels/CLTableLookupKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLLut.h" diff --git a/src/core/CL/kernels/CLTableLookupKernel.h b/src/core/CL/kernels/CLTableLookupKernel.h new file mode 100644 index 0000000000..c8d15cbee2 --- /dev/null +++ b/src/core/CL/kernels/CLTableLookupKernel.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLTABLELOOKUPKERNEL_H +#define ARM_COMPUTE_CLTABLELOOKUPKERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; +class ICLLut; + +/** Interface for the kernel to perform table lookup calculations. */ +class CLTableLookupKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input, lut and output. + * + * @param[in] input An input tensor. Data types supported: U8, S16. + * @param[in] lut The input LUT. Data types supported: U8, S16. + * @param[out] output The output tensor. Data types supported: U8, S16. + */ + void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output); + /** Initialise the kernel's input, lut and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8, S16. + * @param[in] lut The input LUT. Data types supported: U8, S16. + * @param[out] output The output tensor. Data types supported: U8, S16. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output); +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLTABLELOOKUPKERNEL_H */ diff --git a/src/core/CL/kernels/CLThresholdKernel.cpp b/src/core/CL/kernels/CLThresholdKernel.cpp index de81644edd..72c22f043c 100644 --- a/src/core/CL/kernels/CLThresholdKernel.cpp +++ b/src/core/CL/kernels/CLThresholdKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLThresholdKernel.h" +#include "src/core/CL/kernels/CLThresholdKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/src/core/CL/kernels/CLThresholdKernel.h b/src/core/CL/kernels/CLThresholdKernel.h new file mode 100644 index 0000000000..511eaed1bf --- /dev/null +++ b/src/core/CL/kernels/CLThresholdKernel.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLTHRESHOLDKERNEL_H +#define ARM_COMPUTE_CLTHRESHOLDKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** Interface for the thresholding kernel. */ +class CLThresholdKernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input, output and threshold parameters. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] info Threshold descriptor + */ + void configure(const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info); + /**Initialise the kernel's input, output and threshold parameters. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] info Threshold descriptor + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info); +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */ diff --git a/src/core/CL/kernels/CLTileKernel.cpp b/src/core/CL/kernels/CLTileKernel.cpp index 43c8953363..c0c3d2e2ee 100644 --- a/src/core/CL/kernels/CLTileKernel.cpp +++ b/src/core/CL/kernels/CLTileKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLTileKernel.h" +#include "src/core/CL/kernels/CLTileKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/helpers/AutoConfiguration.h" diff --git a/src/core/CL/kernels/CLTileKernel.h b/src/core/CL/kernels/CLTileKernel.h new file mode 100644 index 0000000000..41752ca90b --- /dev/null +++ b/src/core/CL/kernels/CLTileKernel.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLTILEKERNEL_H +#define ARM_COMPUTE_CLTILEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a Tile operation */ +class CLTileKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLTileKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLTileKernel(const CLTileKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLTileKernel &operator=(const CLTileKernel &) = delete; + /** Allow instances of this class to be moved */ + CLTileKernel(CLTileKernel &&) = default; + /** Allow instances of this class to be moved */ + CLTileKernel &operator=(CLTileKernel &&) = default; + /** Default destructor */ + ~CLTileKernel() = default; + /** Set the source, destination of the kernel + * + * @param[in] input Source tensor. Data type supported: All. + * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. + * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported). + * @param[out] output Destination tensor. Same as @p input + * + */ + void configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples); + /** Set the source, destination of the kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: All. + * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. + * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported). + * @param[out] output Destination tensor. Same as @p input + * + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples); + /** Static function to check if given info will lead to a valid configuration of @ref CLTileKernel + * + * @param[in] input Source tensor info. Data type supported: All. + * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. + * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported). + * @param[in] output Destination tensor info. Same as @p input + * + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLTILEKERNEL_H */ diff --git a/src/core/CL/kernels/CLTransposeKernel.cpp b/src/core/CL/kernels/CLTransposeKernel.cpp index bd910196e9..8d967e901f 100644 --- a/src/core/CL/kernels/CLTransposeKernel.cpp +++ b/src/core/CL/kernels/CLTransposeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" +#include "src/core/CL/kernels/CLTransposeKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLTransposeKernel.h b/src/core/CL/kernels/CLTransposeKernel.h new file mode 100644 index 0000000000..0c4b7b4aff --- /dev/null +++ b/src/core/CL/kernels/CLTransposeKernel.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLTRANSPOSEKERNEL_H +#define ARM_COMPUTE_CLTRANSPOSEKERNEL_H + +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel which transposes the elements of a matrix. + * + * [width, height, batch] -> [height, width, batch] + * + */ +class CLTransposeKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLTransposeKernel + * + * @param[in] input Input tensor. Data types supported: All. + * @param[in] output Output tensor. Data type supported: Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLTRANSPOSEKERNEL_H */ diff --git a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp index a4fc10f26a..acb2fbcd04 100644 --- a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp +++ b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h" +#include "src/core/CL/kernels/CLUpsampleLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLUpsampleLayerKernel.h b/src/core/CL/kernels/CLUpsampleLayerKernel.h new file mode 100644 index 0000000000..f90ee07bf4 --- /dev/null +++ b/src/core/CL/kernels/CLUpsampleLayerKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H +#define ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the UpsampleLayer kernel on OpenCL. */ +class CLUpsampleLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLUpsampleLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLUpsampleLayerKernel(const CLUpsampleLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLUpsampleLayerKernel &operator=(const CLUpsampleLayerKernel &) = delete; + /** Default Move Constructor. */ + CLUpsampleLayerKernel(CLUpsampleLayerKernel &&) = default; + /** Default move assignment operator */ + CLUpsampleLayerKernel &operator=(CLUpsampleLayerKernel &&) = default; + /** Default destructor */ + ~CLUpsampleLayerKernel() = default; + + /** Initialise the kernel's input and output. + * + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] info Contains stride information described in @ref Size2D. + * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. + */ + void configure(const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy); + /** Initialise the kernel's input and output. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] info Contains stride information described in @ref Size2D. + * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy); + /** Static function to check if given info will lead to a valid configuration of @ref CLUpsampleLayerKernel + * + * @param[in] input Source tensor info. Data types supported: All. + * @param[in] output Destination tensor info. Data types supported: same as @p input. + * @param[in] info Contains stride information described in @ref Size2D. + * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy upsampling_policy); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + Size2D _info; + DataLayout _data_layout; + unsigned int _num_elems_processed_per_iteration_input_x; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLWarpAffineKernel.cpp b/src/core/CL/kernels/CLWarpAffineKernel.cpp index 95a7c1b875..600c67a528 100644 --- a/src/core/CL/kernels/CLWarpAffineKernel.cpp +++ b/src/core/CL/kernels/CLWarpAffineKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h" +#include "src/core/CL/kernels/CLWarpAffineKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLWarpAffineKernel.h b/src/core/CL/kernels/CLWarpAffineKernel.h new file mode 100644 index 0000000000..c600ee780d --- /dev/null +++ b/src/core/CL/kernels/CLWarpAffineKernel.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLWARPAFFINEKERNEL_H +#define ARM_COMPUTE_CLWARPAFFINEKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the warp affine kernel.*/ +class CLWarpAffineKernel : public ICLSimple2DKernel +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 2x3 of type float + * The matrix argument requires 9 values, the last 3 values are ignored. + * @param[in] policy The interpolation type. + */ + void configure(const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 2x3 of type float + * The matrix argument requires 9 values, the last 3 values are ignored. + * @param[in] policy The interpolation type. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLWARPAFFINEKERNEL_H */ diff --git a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp index 2fe1feb485..5f20a0bdd3 100644 --- a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp +++ b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h" +#include "src/core/CL/kernels/CLWarpPerspectiveKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLWarpPerspectiveKernel.h b/src/core/CL/kernels/CLWarpPerspectiveKernel.h new file mode 100644 index 0000000000..dcbe1c5560 --- /dev/null +++ b/src/core/CL/kernels/CLWarpPerspectiveKernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H +#define ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; +/** Interface for the warp perspective kernel.*/ +class CLWarpPerspectiveKernel : public ICLSimple2DKernel +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 3x3 of type float. + * @param[in] policy The interpolation type. + */ + void configure(const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 3x3 of type float. + * @param[in] policy The interpolation type. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H */ diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp index c06c2d3ec7..559f47ce26 100644 --- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp +++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" +#include "src/core/CL/kernels/CLWeightsReshapeKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.h b/src/core/CL/kernels/CLWeightsReshapeKernel.h new file mode 100644 index 0000000000..402a60472b --- /dev/null +++ b/src/core/CL/kernels/CLWeightsReshapeKernel.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H +#define ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +/** OpenCL kernel to perform reshaping on the weights used by convolution and locally connected layer + * + * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. + * In combination with the @ref CLIm2ColKernel can transform a convolution to a matrix multiplication. + * + * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have: + * @f[ + * \left( \begin{array}{ccc} + * a000 & a001 & a002 \\ + * a010 & a011 & a012 \\ + * a020 & a021 & a022 \\ + * \end{array} \right) + * \left( \begin{array}{ccc} + * a100 & a101 & a102 \\ + * a110 & a111 & a112 \\ + * a120 & a121 & a122 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccc} + * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\ + * \end{array} \right) + * @f] + */ +class CLWeightsReshapeKernel : public ICLKernel +{ +public: + /** Constructor.*/ + CLWeightsReshapeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWeightsReshapeKernel(const CLWeightsReshapeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWeightsReshapeKernel &operator=(const CLWeightsReshapeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLWeightsReshapeKernel(CLWeightsReshapeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLWeightsReshapeKernel &operator=(CLWeightsReshapeKernel &&) = default; + /** Default destructor */ + ~CLWeightsReshapeKernel() = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All + * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr. + * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. + * @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise. + * Data types supported: Same as @p input + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. + */ + void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1); + /** Set the input and output of the kernel. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All + * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr. + * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. + * @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise. + * Data types supported: Same as @p input + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1); + /** Static function to check if given info will lead to a valid configuration of @ref CLWeightsReshapeKernel + * + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All + * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr. + * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. + * @param[in] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise. + * Data types supported: Same as @p input + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups = 1); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_biases; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H */ \ No newline at end of file diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp index a7a3463f59..d6697ba46b 100644 --- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" +#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h new file mode 100644 index 0000000000..2af89e12eb --- /dev/null +++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H +#define ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +/** Interface for the width concatenate kernel of 2 tensors. + * The input1 and input2 tensors will be concatenated into the output tensor. + */ +class CLWidthConcatenate2TensorsKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLWidthConcatenate2TensorsKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWidthConcatenate2TensorsKernel(const CLWidthConcatenate2TensorsKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWidthConcatenate2TensorsKernel &operator=(const CLWidthConcatenate2TensorsKernel &) = delete; + /** Allow instances of this class to be moved */ + CLWidthConcatenate2TensorsKernel(CLWidthConcatenate2TensorsKernel &&) = default; + /** Allow instances of this class to be moved */ + CLWidthConcatenate2TensorsKernel &operator=(CLWidthConcatenate2TensorsKernel &&) = default; + /** Default destructor */ + ~CLWidthConcatenate2TensorsKernel() = default; + /** Initialise the kernel's input1s and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 First input tensor. Data types supported: All. + * @param[in] input2 Second input tensor. Data types supported: same as @p input1 + * @param[out] output Output tensor. Data types supported: Same as @p input1. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate2TensorsKernel + * + * @param[in] input1 First tensor info. Data types supported: All. + * @param[in] input2 Second tensor info. Data types supported: same as @p input1 + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H */ diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp index 1c8fef2db3..7ecdd30224 100644 --- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" +#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h new file mode 100644 index 0000000000..0caf87114d --- /dev/null +++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H +#define ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +/** Interface for the width concatenate kernel of 4 tensors. + * All input tensors will be concatenated into the output tensor. + */ +class CLWidthConcatenate4TensorsKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLWidthConcatenate4TensorsKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWidthConcatenate4TensorsKernel(const CLWidthConcatenate4TensorsKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWidthConcatenate4TensorsKernel &operator=(const CLWidthConcatenate4TensorsKernel &) = delete; + /** Allow instances of this class to be moved */ + CLWidthConcatenate4TensorsKernel(CLWidthConcatenate4TensorsKernel &&) = default; + /** Allow instances of this class to be moved */ + CLWidthConcatenate4TensorsKernel &operator=(CLWidthConcatenate4TensorsKernel &&) = default; + /** Default destructor */ + ~CLWidthConcatenate4TensorsKernel() = default; + /** Initialise the kernel's input1s and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input1 First input tensor. Data types supported: All. + * @param[in] input2 Second input tensor. Data types supported: same as @p input1 + * @param[in] input3 Third input tensor. Data types supported: same as @p input1 + * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1 + * @param[out] output Output tensor. Data types supported: Same as @p input1. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *input3, ITensorInfo *input4, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate4TensorsKernel + * + * @param[in] input1 First tensor info. Data types supported: All. + * @param[in] input2 Second tensor info. Data types supported: same as @p input1 + * @param[in] input3 Third tensor info. Data types supported: same as @p input1 + * @param[in] input4 Fourth tensor info. Data types supported: same as @p input1 + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H */ diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp index a9a601dc8e..30d0a481bd 100644 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h" +#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h new file mode 100644 index 0000000000..09c3f4455d --- /dev/null +++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H +#define ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +/** Interface for the width concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class CLWidthConcatenateLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLWidthConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWidthConcatenateLayerKernel(const CLWidthConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWidthConcatenateLayerKernel &operator=(const CLWidthConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLWidthConcatenateLayerKernel(CLWidthConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLWidthConcatenateLayerKernel &operator=(CLWidthConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~CLWidthConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Input tensor. Data types supported: All. + * @param[in] width_offset The offset on the X axis. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int width_offset, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: All. + * @param[in] width_offset The offset on the X axis. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp index e2f9ca5726..bd45ddb65f 100644 --- a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h" +#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" @@ -83,7 +83,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen const unsigned int num_elems_processed_per_iteration_y = input->dimension(1); const unsigned int num_elems_read_per_iteration_z = input->data_layout() == DataLayout::NCHW ? 1 : input->dimension(2); - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y, num_elems_read_per_iteration_z)); + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y, num_elems_read_per_iteration_z)); Window win_collapsed = win.collapse(win, Window::DimZ); return std::make_pair(Status{}, win_collapsed); } diff --git a/src/core/CL/kernels/CLWinogradFilterTransformKernel.h b/src/core/CL/kernels/CLWinogradFilterTransformKernel.h new file mode 100644 index 0000000000..d22fedebcd --- /dev/null +++ b/src/core/CL/kernels/CLWinogradFilterTransformKernel.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H +#define ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the Winograd filter transform kernel. */ +class CLWinogradFilterTransformKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLWinogradFilterTransformKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWinogradFilterTransformKernel(const CLWinogradFilterTransformKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWinogradFilterTransformKernel &operator=(const CLWinogradFilterTransformKernel &) = delete; + /** Allow instances of this class to be moved */ + CLWinogradFilterTransformKernel(CLWinogradFilterTransformKernel &&) = default; + /** Allow instances of this class to be moved */ + CLWinogradFilterTransformKernel &operator=(CLWinogradFilterTransformKernel &&) = default; + /** Default destructor */ + ~CLWinogradFilterTransformKernel() = default; + /** Set the input and output tensor. + * + * @note Winograd filter transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd filter transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32. + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo + */ + void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); + /** Set the input and output tensor. + * + * @note Winograd filter transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd filter transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32. + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradFilterTransformKernel + * + * @note Winograd filter transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd filter transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32. + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H */ diff --git a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp index 15c239e849..6f695c93db 100644 --- a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h" +#include "src/core/CL/kernels/CLWinogradInputTransformKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLWinogradInputTransformKernel.h b/src/core/CL/kernels/CLWinogradInputTransformKernel.h new file mode 100644 index 0000000000..25301877e6 --- /dev/null +++ b/src/core/CL/kernels/CLWinogradInputTransformKernel.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H +#define ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform Winograd input transform.*/ +class CLWinogradInputTransformKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLWinogradInputTransformKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWinogradInputTransformKernel(const CLWinogradInputTransformKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWinogradInputTransformKernel &operator=(const CLWinogradInputTransformKernel &) = delete; + /** Allow instances of this class to be moved */ + CLWinogradInputTransformKernel(CLWinogradInputTransformKernel &&) = default; + /** Allow instances of this class to be moved */ + CLWinogradInputTransformKernel &operator=(CLWinogradInputTransformKernel &&) = default; + /** Set the input and output of the kernel. + * + * @note Winograd input transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd input transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] input The input tensor to transform. Data types supported: F16/F32 + * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. + */ + void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); + /** Set the input and output of the kernel. + * + * @note Winograd input transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd input transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] compile_context The compile context to be used. + * @param[in] input The input tensor to transform. Data types supported: F16/F32 + * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransformKernel + * + * @note Winograd input transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd input transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] input The input tensor to transform. Data types supported: F16/F32 + * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + using WinogradKey = std::pair, std::pair>; + + BorderSize _border_size; + const ICLTensor *_input; + ICLTensor *_output; + DataLayout _data_layout; + int _num_tiles_x; + int _num_tiles_y; + unsigned int _step_z; +}; +} // arm_compute +#endif /*ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H */ diff --git a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp index 89a5176756..2018559f60 100644 --- a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h" +#include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" diff --git a/src/core/CL/kernels/CLWinogradOutputTransformKernel.h b/src/core/CL/kernels/CLWinogradOutputTransformKernel.h new file mode 100644 index 0000000000..632a5629d9 --- /dev/null +++ b/src/core/CL/kernels/CLWinogradOutputTransformKernel.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H +#define ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the Winograd output transform kernel. */ +class CLWinogradOutputTransformKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLWinogradOutputTransformKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWinogradOutputTransformKernel(const CLWinogradOutputTransformKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWinogradOutputTransformKernel &operator=(const CLWinogradOutputTransformKernel &) = delete; + /** Allow instances of this class to be moved */ + CLWinogradOutputTransformKernel(CLWinogradOutputTransformKernel &&) = default; + /** Allow instances of this class to be moved */ + CLWinogradOutputTransformKernel &operator=(CLWinogradOutputTransformKernel &&) = default; + /** Default destructor */ + ~CLWinogradOutputTransformKernel() = default; + /** Set the input and output tensor. + * + * @note Winograd output transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd output transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32. + * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Set the input and output tensor. + * + * @note Winograd output transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd output transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32. + * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradOutputTransformKernel + * + * @note Winograd output transform supports the following configurations for NCWH data layout + * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), + * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * @note Winograd output transform supports the following configurations for NHWC data layout + * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), + * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) + * + * Strides: only unit strides + * + * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32. + * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo + * @param[in] act_info (Optional) Activation layer information in case of a fused activation @ref ActivationLayerInfo. Only RELU, BOUNDED_RELU, LU_BOUNDED_RELU, LEAKY_RELU and SOFT_RELU supported. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + using WinogradKey = std::pair, std::pair>; + + const ICLTensor *_input; + const ICLTensor *_bias; + ICLTensor *_output; + bool _is_nhwc; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H */ diff --git a/src/core/CL/kernels/CLYOLOLayerKernel.cpp b/src/core/CL/kernels/CLYOLOLayerKernel.cpp index 0c7588d740..e12d1e7a65 100644 --- a/src/core/CL/kernels/CLYOLOLayerKernel.cpp +++ b/src/core/CL/kernels/CLYOLOLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLYOLOLayerKernel.h" +#include "src/core/CL/kernels/CLYOLOLayerKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLHelpers.h" diff --git a/src/core/CL/kernels/CLYOLOLayerKernel.h b/src/core/CL/kernels/CLYOLOLayerKernel.h new file mode 100644 index 0000000000..5b1d56e9e5 --- /dev/null +++ b/src/core/CL/kernels/CLYOLOLayerKernel.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLYOLOLAYERKERNEL_H +#define ARM_COMPUTE_CLYOLOLAYERKERNEL_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the YOLO layer kernel that performs partial activation. + * For each box, activate only: + * - x and y position (channel 0 and 1 of each box) + * - objectiveness (channel 4 of each box) + * - classes (channel 5 to (classes - 5) of each box) + */ +class CLYOLOLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLYOLOLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLYOLOLayerKernel(const CLYOLOLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLYOLOLayerKernel &operator=(const CLYOLOLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLYOLOLayerKernel(CLYOLOLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLYOLOLayerKernel &operator=(CLYOLOLayerKernel &&) = default; + /** Default destructor */ + ~CLYOLOLayerKernel() = default; + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) + */ + void configure(ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); + /** Static function to check if given info will lead to a valid configuration of @ref CLYOLOLayerKernel + * + * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result + * of the activation function. Data types supported: F16/F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; + bool _run_in_place; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLYOLOLAYERKERNEL_H */ diff --git a/src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h b/src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h new file mode 100644 index 0000000000..4c92ae417f --- /dev/null +++ b/src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H +#define ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H + +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor. + */ +class ICLDepthwiseConvolutionLayer3x3Kernel : public ICLKernel +{ +public: + /** Default constructor */ + ICLDepthwiseConvolutionLayer3x3Kernel() + : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_y(1), _output_multipliers(), _output_shifts(), _is_quantized(false) + { + } + /** Prevent instances of this class from being copied (As this class contains pointers) */ + ICLDepthwiseConvolutionLayer3x3Kernel(const ICLDepthwiseConvolutionLayer3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + ICLDepthwiseConvolutionLayer3x3Kernel &operator=(const ICLDepthwiseConvolutionLayer3x3Kernel &) = delete; + /** Default Move Constructor. */ + ICLDepthwiseConvolutionLayer3x3Kernel(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default; + /** Default move assignment operator */ + ICLDepthwiseConvolutionLayer3x3Kernel &operator=(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default; + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. + * Data type supported: Same as @p input, QASYMM8/QSYMM8_PER_CHANNEL when input is QASYMM8. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported for QASYMM8. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + virtual void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0; + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. + * Data type supported: Same as @p input, QASYMM8/QSYMM8_PER_CHANNEL when input is QASYMM8. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported for QASYMM8. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, + * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 + */ + virtual void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), + const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0; + +protected: + BorderSize _border_size; + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_weights; + const ICLTensor *_biases; + unsigned int _conv_stride_y; + const ICLTensor *_output_multipliers; + const ICLTensor *_output_shifts; + bool _is_quantized; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H */ diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp index cd732553be..98013b9e49 100644 --- a/src/graph/backends/CL/CLFunctionsFactory.cpp +++ b/src/graph/backends/CL/CLFunctionsFactory.cpp @@ -28,6 +28,7 @@ #include "arm_compute/graph/backends/FunctionHelpers.h" #include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CPP/CPPFunctions.h" +#include "src/core/CL/CLKernels.h" #include "support/Cast.h" using namespace arm_compute::utils::cast; diff --git a/src/graph/backends/CL/CLNodeValidator.cpp b/src/graph/backends/CL/CLNodeValidator.cpp index 8c1fedd93f..830f54ce3f 100644 --- a/src/graph/backends/CL/CLNodeValidator.cpp +++ b/src/graph/backends/CL/CLNodeValidator.cpp @@ -28,6 +28,17 @@ #include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CPP/CPPFunctions.h" +#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/core/CL/kernels/CLIm2ColKernel.h" +#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h" +#include "src/core/CL/kernels/CLWeightsReshapeKernel.h" #include "support/Cast.h" using namespace arm_compute::utils::cast; diff --git a/src/runtime/CL/CLOperator.cpp b/src/runtime/CL/CLOperator.cpp index 57a4d0ec57..075a544077 100644 --- a/src/runtime/CL/CLOperator.cpp +++ b/src/runtime/CL/CLOperator.cpp @@ -24,6 +24,8 @@ #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/ICLOperator.h" +#include "src/core/CL/ICLKernel.h" + namespace arm_compute { namespace experimental diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp index ccef5cbd1b..6fc7baed63 100644 --- a/src/runtime/CL/CLScheduler.cpp +++ b/src/runtime/CL/CLScheduler.cpp @@ -24,10 +24,10 @@ #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/runtime/CL/CLHelpers.h" #include "arm_compute/runtime/CL/CLTuner.h" #include "arm_compute/runtime/CL/tuners/Tuners.h" +#include "src/core/CL/ICLKernel.h" namespace arm_compute { diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp index adfe67fb11..ed85e606cf 100644 --- a/src/runtime/CL/CLTuner.cpp +++ b/src/runtime/CL/CLTuner.cpp @@ -24,9 +24,9 @@ #include "arm_compute/runtime/CL/CLTuner.h" #include "arm_compute/runtime/CL/tuners/CLLWSList.h" -#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/ICLKernel.h" #include "support/StringSupport.h" #include diff --git a/src/runtime/CL/ICLSimpleFunction.cpp b/src/runtime/CL/ICLSimpleFunction.cpp index b00ad5e74f..b075aa17e3 100644 --- a/src/runtime/CL/ICLSimpleFunction.cpp +++ b/src/runtime/CL/ICLSimpleFunction.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,19 +26,24 @@ #include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/CLHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/ICLKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "support/MemorySupport.h" using namespace arm_compute; ICLSimpleFunction::ICLSimpleFunction(CLRuntimeContext *ctx) // NOLINT : _kernel(), - _border_handler(), + _border_handler(support::cpp14::make_unique()), _ctx(ctx) { } +ICLSimpleFunction::~ICLSimpleFunction() = default; + void ICLSimpleFunction::run() { ARM_COMPUTE_ERROR_ON_MSG(!_kernel, "The child class didn't set the CL kernel or function isn't configured"); - schedule_kernel_on_ctx(_ctx, &_border_handler, false); + schedule_kernel_on_ctx(_ctx, _border_handler.get(), false); schedule_kernel_on_ctx(_ctx, _kernel.get()); } diff --git a/src/runtime/CL/functions/CLAbsoluteDifference.cpp b/src/runtime/CL/functions/CLAbsoluteDifference.cpp index d5d1bbdd7a..b7f40a516c 100644 --- a/src/runtime/CL/functions/CLAbsoluteDifference.cpp +++ b/src/runtime/CL/functions/CLAbsoluteDifference.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLAbsoluteDifference.h" -#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h" +#include "src/core/CL/kernels/CLAbsoluteDifferenceKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLAccumulate.cpp b/src/runtime/CL/functions/CLAccumulate.cpp index 2f06252446..742de64e34 100644 --- a/src/runtime/CL/functions/CLAccumulate.cpp +++ b/src/runtime/CL/functions/CLAccumulate.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLAccumulate.h" -#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h" +#include "src/core/CL/kernels/CLAccumulateKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp index 5ddf227382..61c82b33eb 100644 --- a/src/runtime/CL/functions/CLActivationLayer.cpp +++ b/src/runtime/CL/functions/CLActivationLayer.cpp @@ -24,9 +24,9 @@ #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLRuntimeContext.h" +#include "src/core/CL/kernels/CLActivationLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp index 57c4f685f6..5fc849e3c5 100644 --- a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp +++ b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp @@ -30,8 +30,10 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/CL/CLValidate.h" +#include "src/core/CL/kernels/CLArgMinMaxLayerKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/runtime/Utils.h" +#include "support/MemorySupport.h" namespace arm_compute { @@ -40,6 +42,8 @@ CLArgMinMaxLayer::CLArgMinMaxLayer(std::shared_ptr memory_manage { } +CLArgMinMaxLayer::~CLArgMinMaxLayer() = default; + Status CLArgMinMaxLayer::validate(const ITensorInfo *input, int axis, const ITensorInfo *output, const ReductionOperation &op) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -124,13 +128,19 @@ void CLArgMinMaxLayer::configure(const CLCompileContext &compile_context, const auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(true)); // Configure reduction operation kernels - _reduction_kernels_vector.resize(_num_of_stages); + _reduction_kernels_vector.reserve(_num_of_stages); + + auto add_reduction_kernel = [this, &compile_context, axis, op](const ICLTensor * input, const ICLTensor * prev_output, ICLTensor * output) + { + _reduction_kernels_vector.emplace_back(support::cpp14::make_unique()); + _reduction_kernels_vector.back()->configure(compile_context, input, prev_output, output, axis, op); + }; _memory_group.manage(&_not_reshaped_output); // Create temporary tensors if(_num_of_stages == 1) { - _reduction_kernels_vector[0].configure(compile_context, input, nullptr, &_not_reshaped_output, axis, op); + add_reduction_kernel(input, nullptr, &_not_reshaped_output); } else { @@ -144,19 +154,19 @@ void CLArgMinMaxLayer::configure(const CLCompileContext &compile_context, const // Apply ReductionOperation only on first kernel _memory_group.manage(&_results_vector[0]); - _reduction_kernels_vector[0].configure(compile_context, input, nullptr, &_results_vector[0], axis, op); + add_reduction_kernel(input, nullptr, &_results_vector[0]); // Apply ReductionOperation on intermediate stages for(unsigned int i = 1; i < _num_of_stages - 1; ++i) { _memory_group.manage(&_results_vector[i]); - _reduction_kernels_vector[i].configure(compile_context, input, &_results_vector[i - 1], &_results_vector[i], axis, op); + add_reduction_kernel(input, &_results_vector[i - 1], &_results_vector[i]); _results_vector[i - 1].allocator()->allocate(); } // Apply ReductionOperation on the last stage const unsigned int last_stage = _num_of_stages - 1; - _reduction_kernels_vector[last_stage].configure(compile_context, input, &_results_vector[last_stage - 1], &_not_reshaped_output, axis, op); + add_reduction_kernel(input, &_results_vector[last_stage - 1], &_not_reshaped_output); _results_vector[last_stage - 1].allocator()->allocate(); } _reshape.configure(compile_context, &_not_reshaped_output, output); @@ -169,7 +179,7 @@ void CLArgMinMaxLayer::run() for(unsigned int i = 0; i < _num_of_stages; ++i) { - CLScheduler::get().enqueue(_reduction_kernels_vector[i], false); + CLScheduler::get().enqueue(*_reduction_kernels_vector[i], false); } _reshape.run(); } diff --git a/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp b/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp index 701add074e..77eed1140f 100644 --- a/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp +++ b/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp @@ -29,14 +29,19 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +#include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h" +namespace arm_compute +{ CLBatchNormalizationLayer::CLBatchNormalizationLayer() - : _norm_kernel() + : _norm_kernel(support::cpp14::make_unique()) { } +CLBatchNormalizationLayer::~CLBatchNormalizationLayer() = default; + void CLBatchNormalizationLayer::configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon, ActivationLayerInfo act_info) { @@ -47,7 +52,7 @@ void CLBatchNormalizationLayer::configure(const CLCompileContext &compile_contex const ICLTensor *gamma, float epsilon, ActivationLayerInfo act_info) { - _norm_kernel.configure(compile_context, input, output, mean, var, beta, gamma, epsilon, act_info); + _norm_kernel->configure(compile_context, input, output, mean, var, beta, gamma, epsilon, act_info); } Status CLBatchNormalizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, @@ -60,5 +65,6 @@ Status CLBatchNormalizationLayer::validate(const ITensorInfo *input, const ITens void CLBatchNormalizationLayer::run() { - CLScheduler::get().enqueue(_norm_kernel, true); + CLScheduler::get().enqueue(*_norm_kernel, true); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp b/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp index 5ba3b5bc9c..e0a2c430ed 100644 --- a/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp +++ b/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp @@ -30,13 +30,18 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" -using namespace arm_compute; +#include "src/core/CL/kernels/CLBatchToSpaceLayerKernel.h" +#include "support/MemorySupport.h" +namespace arm_compute +{ CLBatchToSpaceLayer::CLBatchToSpaceLayer() - : _batch_to_space_kernel() + : _batch_to_space_kernel(support::cpp14::make_unique()) { } +CLBatchToSpaceLayer::~CLBatchToSpaceLayer() = default; + void CLBatchToSpaceLayer::configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output) { configure(CLKernelLibrary::get().get_compile_context(), input, block_shape, output); @@ -44,7 +49,7 @@ void CLBatchToSpaceLayer::configure(const ICLTensor *input, const ICLTensor *blo void CLBatchToSpaceLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output) { - _batch_to_space_kernel.configure(compile_context, input, block_shape, output); + _batch_to_space_kernel->configure(compile_context, input, block_shape, output); } void CLBatchToSpaceLayer::configure(const ICLTensor *input, int32_t block_shape_x, int32_t block_shape_y, ICLTensor *output) @@ -54,7 +59,7 @@ void CLBatchToSpaceLayer::configure(const ICLTensor *input, int32_t block_shape_ void CLBatchToSpaceLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, int32_t block_shape_x, int32_t block_shape_y, ICLTensor *output) { - _batch_to_space_kernel.configure(compile_context, input, block_shape_x, block_shape_y, output); + _batch_to_space_kernel->configure(compile_context, input, block_shape_x, block_shape_y, output); } Status CLBatchToSpaceLayer::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output) @@ -69,5 +74,6 @@ Status CLBatchToSpaceLayer::validate(const ITensorInfo *input, int32_t block_sha void CLBatchToSpaceLayer::run() { - CLScheduler::get().enqueue(_batch_to_space_kernel, true); + CLScheduler::get().enqueue(*_batch_to_space_kernel, true); } +} // namespace arm_compute diff --git a/src/runtime/CL/functions/CLBitwiseAnd.cpp b/src/runtime/CL/functions/CLBitwiseAnd.cpp index cb49e61e84..cfcd63f170 100644 --- a/src/runtime/CL/functions/CLBitwiseAnd.cpp +++ b/src/runtime/CL/functions/CLBitwiseAnd.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h" -#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h" +#include "src/core/CL/kernels/CLBitwiseAndKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLBitwiseNot.cpp b/src/runtime/CL/functions/CLBitwiseNot.cpp index 22c575ca8d..588c793f6a 100644 --- a/src/runtime/CL/functions/CLBitwiseNot.cpp +++ b/src/runtime/CL/functions/CLBitwiseNot.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLBitwiseNot.h" -#include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h" +#include "src/core/CL/kernels/CLBitwiseNotKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLBitwiseOr.cpp b/src/runtime/CL/functions/CLBitwiseOr.cpp index 4bbb8909fe..3a5de193a3 100644 --- a/src/runtime/CL/functions/CLBitwiseOr.cpp +++ b/src/runtime/CL/functions/CLBitwiseOr.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLBitwiseOr.h" -#include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h" +#include "src/core/CL/kernels/CLBitwiseOrKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLBitwiseXor.cpp b/src/runtime/CL/functions/CLBitwiseXor.cpp index bc37f6eaab..62aeaaa31f 100644 --- a/src/runtime/CL/functions/CLBitwiseXor.cpp +++ b/src/runtime/CL/functions/CLBitwiseXor.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLBitwiseXor.h" -#include "arm_compute/core/CL/kernels/CLBitwiseXorKernel.h" +#include "src/core/CL/kernels/CLBitwiseXorKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLBoundingBoxTransform.cpp b/src/runtime/CL/functions/CLBoundingBoxTransform.cpp index 2384fc4132..600d36290c 100644 --- a/src/runtime/CL/functions/CLBoundingBoxTransform.cpp +++ b/src/runtime/CL/functions/CLBoundingBoxTransform.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h" -#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h" +#include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLBox3x3.cpp b/src/runtime/CL/functions/CLBox3x3.cpp index 0300899b59..be40f25055 100644 --- a/src/runtime/CL/functions/CLBox3x3.cpp +++ b/src/runtime/CL/functions/CLBox3x3.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/runtime/CL/functions/CLBox3x3.h" -#include "arm_compute/core/CL/kernels/CLBox3x3Kernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/CL/kernels/CLBox3x3Kernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" #include "support/MemorySupport.h" #include @@ -41,5 +42,5 @@ void CLBox3x3::configure(const CLCompileContext &compile_context, ICLTensor *inp auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value)); } diff --git a/src/runtime/CL/functions/CLCannyEdge.cpp b/src/runtime/CL/functions/CLCannyEdge.cpp index cd2d6b478a..5a32564d2d 100644 --- a/src/runtime/CL/functions/CLCannyEdge.cpp +++ b/src/runtime/CL/functions/CLCannyEdge.cpp @@ -31,6 +31,10 @@ #include "arm_compute/runtime/CL/functions/CLSobel3x3.h" #include "arm_compute/runtime/CL/functions/CLSobel5x5.h" #include "arm_compute/runtime/CL/functions/CLSobel7x7.h" +#include "src/core/CL/kernels/CLCannyEdgeKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLSobel5x5Kernel.h" +#include "src/core/CL/kernels/CLSobel7x7Kernel.h" #include "support/MemorySupport.h" using namespace arm_compute; @@ -38,10 +42,10 @@ using namespace arm_compute; CLCannyEdge::CLCannyEdge(std::shared_ptr memory_manager) // NOLINT : _memory_group(std::move(memory_manager)), _sobel(), - _gradient(), - _border_mag_gradient(), - _non_max_suppr(), - _edge_trace(), + _gradient(support::cpp14::make_unique()), + _border_mag_gradient(support::cpp14::make_unique()), + _non_max_suppr(support::cpp14::make_unique()), + _edge_trace(support::cpp14::make_unique()), _gx(), _gy(), _mag(), @@ -55,6 +59,8 @@ CLCannyEdge::CLCannyEdge(std::shared_ptr memory_manager) // NOLI { } +CLCannyEdge::~CLCannyEdge() = default; + void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value) { @@ -143,7 +149,7 @@ void CLCannyEdge::configure(const CLCompileContext &compile_context, ICLTensor * _memory_group.manage(&_phase); // Configure gradient - _gradient.configure(compile_context, &_gx, &_gy, &_mag, &_phase, norm_type); + _gradient->configure(compile_context, &_gx, &_gy, &_mag, &_phase, norm_type); // Allocate intermediate buffers _gx.allocator()->allocate(); @@ -153,14 +159,14 @@ void CLCannyEdge::configure(const CLCompileContext &compile_context, ICLTensor * _memory_group.manage(&_nonmax); // Configure non-maxima suppression - _non_max_suppr.configure(compile_context, &_mag, &_phase, &_nonmax, lower_thr, border_mode == BorderMode::UNDEFINED); + _non_max_suppr->configure(compile_context, &_mag, &_phase, &_nonmax, lower_thr, border_mode == BorderMode::UNDEFINED); // Allocate intermediate buffers _phase.allocator()->allocate(); // Fill border around magnitude image as non-maxima suppression will access // it. If border mode is undefined filling the border is a nop. - _border_mag_gradient.configure(compile_context, &_mag, _non_max_suppr.border_size(), border_mode, constant_border_value); + _border_mag_gradient->configure(compile_context, &_mag, _non_max_suppr->border_size(), border_mode, constant_border_value); // Allocate intermediate buffers _mag.allocator()->allocate(); @@ -172,7 +178,7 @@ void CLCannyEdge::configure(const CLCompileContext &compile_context, ICLTensor * _memory_group.manage(&_l1_list_counter); // Configure edge tracing - _edge_trace.configure(compile_context, &_nonmax, output, upper_thr, lower_thr, &_visited, &_recorded, &_l1_stack, &_l1_list_counter); + _edge_trace->configure(compile_context, &_nonmax, output, upper_thr, lower_thr, &_visited, &_recorded, &_l1_stack, &_l1_list_counter); // Allocate intermediate buffers _visited.allocator()->allocate(); @@ -190,14 +196,14 @@ void CLCannyEdge::run() _sobel->run(); // Run phase and magnitude calculation - CLScheduler::get().enqueue(_gradient, false); + CLScheduler::get().enqueue(*_gradient, false); // Fill border before non-maxima suppression. Nop for border mode undefined. - CLScheduler::get().enqueue(_border_mag_gradient, false); + CLScheduler::get().enqueue(*_border_mag_gradient, false); // Run non max suppresion _nonmax.clear(CLScheduler::get().queue()); - CLScheduler::get().enqueue(_non_max_suppr, false); + CLScheduler::get().enqueue(*_non_max_suppr, false); // Clear temporary structures and run edge trace _output->clear(CLScheduler::get().queue()); @@ -205,5 +211,5 @@ void CLCannyEdge::run() _recorded.clear(CLScheduler::get().queue()); _l1_list_counter.clear(CLScheduler::get().queue()); _l1_stack.clear(CLScheduler::get().queue()); - CLScheduler::get().enqueue(_edge_trace, true); + CLScheduler::get().enqueue(*_edge_trace, true); } diff --git a/src/runtime/CL/functions/CLCast.cpp b/src/runtime/CL/functions/CLCast.cpp index 95cc0e9239..2a28e06845 100644 --- a/src/runtime/CL/functions/CLCast.cpp +++ b/src/runtime/CL/functions/CLCast.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLCast.h" -#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLChannelCombine.cpp b/src/runtime/CL/functions/CLChannelCombine.cpp index 326caa8c74..e93aea31f4 100644 --- a/src/runtime/CL/functions/CLChannelCombine.cpp +++ b/src/runtime/CL/functions/CLChannelCombine.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLChannelCombine.h" -#include "arm_compute/core/CL/kernels/CLChannelCombineKernel.h" +#include "src/core/CL/kernels/CLChannelCombineKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLChannelExtract.cpp b/src/runtime/CL/functions/CLChannelExtract.cpp index aa37af9988..8b4a3f7458 100644 --- a/src/runtime/CL/functions/CLChannelExtract.cpp +++ b/src/runtime/CL/functions/CLChannelExtract.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLChannelExtract.h" -#include "arm_compute/core/CL/kernels/CLChannelExtractKernel.h" +#include "src/core/CL/kernels/CLChannelExtractKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLChannelShuffleLayer.cpp b/src/runtime/CL/functions/CLChannelShuffleLayer.cpp index b79afdb3b4..c443df3b37 100644 --- a/src/runtime/CL/functions/CLChannelShuffleLayer.cpp +++ b/src/runtime/CL/functions/CLChannelShuffleLayer.cpp @@ -23,8 +23,8 @@ */ #include "arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h" -#include "arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/CL/kernels/CLChannelShuffleLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLColorConvert.cpp b/src/runtime/CL/functions/CLColorConvert.cpp index 2bbb30e24c..95f4257929 100644 --- a/src/runtime/CL/functions/CLColorConvert.cpp +++ b/src/runtime/CL/functions/CLColorConvert.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLColorConvert.h" -#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h" +#include "src/core/CL/kernels/CLColorConvertKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLComparison.cpp b/src/runtime/CL/functions/CLComparison.cpp index 8c18b35583..9b5840aa95 100644 --- a/src/runtime/CL/functions/CLComparison.cpp +++ b/src/runtime/CL/functions/CLComparison.cpp @@ -24,8 +24,9 @@ #include "arm_compute/runtime/CL/functions/CLComparison.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLComparisonKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/CL/kernels/CLComparisonKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" #include "support/MemorySupport.h" namespace arm_compute @@ -47,7 +48,7 @@ void CLComparison::configure(const CLCompileContext &compile_context, ICLTensor if(broadcasted_info->info()->dimension(0) == 1) { - _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); + _border_handler->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); } } } @@ -76,7 +77,7 @@ void CLComparisonStatic::configure(const CLCompileContext &compile_context, if(broadcasted_info->info()->dimension(0) == 1) { - _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); + _border_handler->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); } } } diff --git a/src/runtime/CL/functions/CLComputeAllAnchors.cpp b/src/runtime/CL/functions/CLComputeAllAnchors.cpp index be86fc4f78..2cae0ee455 100644 --- a/src/runtime/CL/functions/CLComputeAllAnchors.cpp +++ b/src/runtime/CL/functions/CLComputeAllAnchors.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/runtime/CL/functions/CLComputeAllAnchors.h" +#include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h" #include "support/MemorySupport.h" diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp index 2eb310b893..54f71f9765 100644 --- a/src/runtime/CL/functions/CLConcatenateLayer.cpp +++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp @@ -23,19 +23,19 @@ */ #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" -#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" -#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" -#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h" +#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h" +#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" +#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" +#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" +#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "support/MemorySupport.h" diff --git a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp index b291ae5b88..8ecc114343 100644 --- a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp +++ b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp @@ -22,6 +22,8 @@ * SOFTWARE. */ #include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h" +#include "src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" #include "support/MemorySupport.h" diff --git a/src/runtime/CL/functions/CLConvolution.cpp b/src/runtime/CL/functions/CLConvolution.cpp index bc962d0052..1ad32d309c 100644 --- a/src/runtime/CL/functions/CLConvolution.cpp +++ b/src/runtime/CL/functions/CLConvolution.cpp @@ -24,7 +24,6 @@ #include "arm_compute/runtime/CL/functions/CLConvolution.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" @@ -32,6 +31,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/ITensorAllocator.h" +#include "src/core/CL/kernels/CLConvolutionKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" #include "support/MemorySupport.h" #include @@ -49,15 +50,20 @@ void CLConvolution3x3::configure(const CLCompileContext &compile_context, ICLTen auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, input, output, conv, scale, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); } template CLConvolutionSquare::CLConvolutionSquare(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler() + : _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(support::cpp14::make_unique>()), + _kernel_vert(support::cpp14::make_unique>()), _kernel(support::cpp14::make_unique>()), + _border_handler(support::cpp14::make_unique()) { } +template +CLConvolutionSquare::~CLConvolutionSquare() = default; + template void CLConvolutionSquare::configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) @@ -88,35 +94,35 @@ void CLConvolutionSquare::configure(const CLCompileContext &compile scale = calculate_matrix_scale(conv, matrix_size); } - _kernel_hor.configure(compile_context, input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED); - _kernel_vert.configure(compile_context, &_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED, type_pair.second); - _border_handler.configure(compile_context, input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value)); + _kernel_hor->configure(compile_context, input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED); + _kernel_vert->configure(compile_context, &_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED, type_pair.second); + _border_handler->configure(compile_context, input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value)); // Allocate intermediate buffer _tmp.allocator()->allocate(); } else { - _kernel.configure(compile_context, input, output, conv, scale, border_mode == BorderMode::UNDEFINED); - _border_handler.configure(compile_context, input, _kernel.border_size(), border_mode, PixelValue(constant_border_value)); + _kernel->configure(compile_context, input, output, conv, scale, border_mode == BorderMode::UNDEFINED); + _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); } } template void CLConvolutionSquare::run() { - CLScheduler::get().enqueue(_border_handler); + CLScheduler::get().enqueue(*_border_handler); if(_is_separable) { MemoryGroupResourceScope scope_mg(_memory_group); - CLScheduler::get().enqueue(_kernel_hor, false); - CLScheduler::get().enqueue(_kernel_vert); + CLScheduler::get().enqueue(*_kernel_hor, false); + CLScheduler::get().enqueue(*_kernel_vert); } else { - CLScheduler::get().enqueue(_kernel); + CLScheduler::get().enqueue(*_kernel); } } @@ -135,5 +141,5 @@ void CLConvolutionRectangle::configure(const CLCompileContext &compile_context, auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, input, output, conv, rows, cols, scale, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); } diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index 85355f0f17..e214bdf0f2 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -29,7 +29,6 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" - #include "support/MemorySupport.h" #include @@ -45,6 +44,8 @@ CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr memory_ma { } +CLConvolutionLayer::~CLConvolutionLayer() = default; + void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups) { diff --git a/src/runtime/CL/functions/CLCopy.cpp b/src/runtime/CL/functions/CLCopy.cpp index acdc52d4f7..f7b016a779 100644 --- a/src/runtime/CL/functions/CLCopy.cpp +++ b/src/runtime/CL/functions/CLCopy.cpp @@ -24,11 +24,11 @@ #include "arm_compute/runtime/CL/functions/CLCopy.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLCopyKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/CL/kernels/CLCopyKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLCropResize.cpp b/src/runtime/CL/functions/CLCropResize.cpp index 4cf9f13a67..4aaa674c5c 100644 --- a/src/runtime/CL/functions/CLCropResize.cpp +++ b/src/runtime/CL/functions/CLCropResize.cpp @@ -25,6 +25,10 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLCopyKernel.h" +#include "src/core/CL/kernels/CLCropKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLMemsetKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -61,6 +65,8 @@ CLCropResize::CLCropResize() { } +CLCropResize::~CLCropResize() = default; + Status CLCropResize::validate(const ITensorInfo *input, ITensorInfo *boxes, ITensorInfo *box_ind, const ITensorInfo *output, Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value) { diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp index e6717b6d01..6fe231ea6c 100644 --- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp @@ -28,7 +28,6 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" - #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp b/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp index eb1fb7fbdf..0cf2ea623f 100644 --- a/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp +++ b/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp @@ -27,16 +27,21 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h" +#include "src/core/CL/kernels/CLMemsetKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { CLDeconvolutionLayerUpsample::CLDeconvolutionLayerUpsample() // NOLINT - : _upsample(), - _memset(), + : _upsample(support::cpp14::make_unique()), + _memset(support::cpp14::make_unique()), _output(nullptr) { } +CLDeconvolutionLayerUpsample::~CLDeconvolutionLayerUpsample() = default; + Status CLDeconvolutionLayerUpsample::validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &info) { return CLDeconvolutionLayerUpsampleKernel::validate(input, output, info); @@ -52,13 +57,13 @@ void CLDeconvolutionLayerUpsample::configure(const CLCompileContext &compile_con ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); _output = output; - _memset.configure(compile_context, _output, PixelValue(0, _output->info()->data_type(), _output->info()->quantization_info())); - _upsample.configure(compile_context, input, _output, info); + _memset->configure(compile_context, _output, PixelValue(0, _output->info()->data_type(), _output->info()->quantization_info())); + _upsample->configure(compile_context, input, _output, info); } void CLDeconvolutionLayerUpsample::run() { - CLScheduler::get().enqueue(_memset, false); - CLScheduler::get().enqueue(_upsample, true); + CLScheduler::get().enqueue(*_memset, false); + CLScheduler::get().enqueue(*_upsample, true); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLDepthConvertLayer.cpp b/src/runtime/CL/functions/CLDepthConvertLayer.cpp index 141eb3fefc..e58c0e5f4c 100644 --- a/src/runtime/CL/functions/CLDepthConvertLayer.cpp +++ b/src/runtime/CL/functions/CLDepthConvertLayer.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h" -#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp b/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp index 8571056104..8dbd974ceb 100644 --- a/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp +++ b/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h" -#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h" +#include "src/core/CL/kernels/CLDepthToSpaceLayerKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp index bb0db2e7a7..2440384e3b 100644 --- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp @@ -24,13 +24,19 @@ #include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" #include "support/MemorySupport.h" namespace arm_compute @@ -119,7 +125,7 @@ Status validate_arguments_3x3(const ITensorInfo *input, const ITensorInfo *weigh CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::CLDepthwiseConvolutionLayerGeneric(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), - _dwc_native_kernel(), + _dwc_native_kernel(support::cpp14::make_unique()), _permute_input_to_nhwc(), _permute_weights_to_nhwc(), _permute_output_to_nchw(), @@ -137,6 +143,8 @@ CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::CLDepthwiseConv { } +CLDepthwiseConvolutionLayer::~CLDepthwiseConvolutionLayer() = default; + void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation) { @@ -206,9 +214,9 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure( dwc_weights_info.n0 = (depth_multiplier == 1) ? 8 : 1; DWCKernelInfo dwc_info; dwc_info.activation_info = act_info; - _dwc_native_kernel.configure(compile_context, input_to_use, weights_to_use, biases, output_to_use, - dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation, - output_multipliers_to_use, output_shifts_to_use); + _dwc_native_kernel->configure(compile_context, input_to_use, weights_to_use, biases, output_to_use, + dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation, + output_multipliers_to_use, output_shifts_to_use); if(_needs_permute) { @@ -302,7 +310,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::run() { _permute_input_to_nhwc.run(); } - CLScheduler::get().enqueue(_dwc_native_kernel); + CLScheduler::get().enqueue(*_dwc_native_kernel); if(_needs_permute) { _permute_output_to_nchw.run(); @@ -343,11 +351,11 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::prepare() CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::CLDepthwiseConvolutionLayerInternal3x3(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _kernel(nullptr), - _border_handler(), + _border_handler(support::cpp14::make_unique()), _permute_input_to_nchw(), _permute_weights_to_nchw(), _permute_output_to_nhwc(), - _reshape_weights(), + _reshape_weights(support::cpp14::make_unique()), _permuted_input(), _permuted_weights(), _permuted_output(), @@ -378,14 +386,14 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::config // Perform validation step ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_ERROR_THROW_ON(CLDepthwiseConvolutionLayerInternal3x3::validate(input->info(), - weights->info(), - biases != nullptr ? biases->info() : nullptr, - output->info(), - conv_info, - depth_multiplier, - act_info, - gpu_target, - dilation)); + weights->info(), + biases != nullptr ? biases->info() : nullptr, + output->info(), + conv_info, + depth_multiplier, + act_info, + gpu_target, + dilation)); const bool is_nhwc = input->info()->data_layout() == DataLayout::NHWC; _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); @@ -434,7 +442,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::config { if(_needs_weights_reshape) { - _reshape_weights.configure(compile_context, weights, &_permuted_weights, info); + _reshape_weights->configure(compile_context, weights, &_permuted_weights, info); weights_to_use = &_permuted_weights; } _kernel = arm_compute::support::cpp14::make_unique(); @@ -486,7 +494,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::config { zero_value = PixelValue(static_cast(input->info()->quantization_info().uniform().offset)); } - _border_handler.configure(compile_context, input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value); + _border_handler->configure(compile_context, input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value); } Status CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, @@ -505,7 +513,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::run() { _permute_input_to_nchw.run(); } - CLScheduler::get().enqueue(_border_handler); + CLScheduler::get().enqueue(*_border_handler); CLScheduler::get().enqueue(*_kernel); if(_needs_permute) @@ -547,7 +555,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::prepar ARM_COMPUTE_ERROR_ON(_needs_permute); ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); _permuted_weights.allocator()->allocate(); - CLScheduler::get().enqueue(_reshape_weights); + CLScheduler::get().enqueue(*_reshape_weights); _original_weights->mark_as_unused(); } _is_prepared = true; @@ -567,7 +575,7 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w void CLDepthwiseConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier, + unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation) { const GPUTarget gpu_target = CLScheduler::get().target(); diff --git a/src/runtime/CL/functions/CLDequantizationLayer.cpp b/src/runtime/CL/functions/CLDequantizationLayer.cpp index 66ac58ef95..6d63463906 100644 --- a/src/runtime/CL/functions/CLDequantizationLayer.cpp +++ b/src/runtime/CL/functions/CLDequantizationLayer.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLDequantizationLayer.h" -#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h" +#include "src/core/CL/kernels/CLDequantizationLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLDerivative.cpp b/src/runtime/CL/functions/CLDerivative.cpp index 7138281f87..a2b883ad28 100644 --- a/src/runtime/CL/functions/CLDerivative.cpp +++ b/src/runtime/CL/functions/CLDerivative.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/runtime/CL/functions/CLDerivative.h" -#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/CL/kernels/CLDerivativeKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" #include "support/MemorySupport.h" #include @@ -41,5 +42,5 @@ void CLDerivative::configure(const CLCompileContext &compile_context, ICLTensor auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, input, output_x, output_y, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value)); } diff --git a/src/runtime/CL/functions/CLDilate.cpp b/src/runtime/CL/functions/CLDilate.cpp index 27acf9f7cc..c3d5f8845f 100644 --- a/src/runtime/CL/functions/CLDilate.cpp +++ b/src/runtime/CL/functions/CLDilate.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/runtime/CL/functions/CLDilate.h" -#include "arm_compute/core/CL/kernels/CLDilateKernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/CL/kernels/CLDilateKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" #include "support/MemorySupport.h" #include @@ -41,5 +42,5 @@ void CLDilate::configure(const CLCompileContext &compile_context, ICLTensor *inp auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value)); } diff --git a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp index 07e7a18941..bff882c28b 100644 --- a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp @@ -24,19 +24,24 @@ #include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "support/MemorySupport.h" using namespace arm_compute; CLDirectConvolutionLayer::CLDirectConvolutionLayer() - : _direct_conv_kernel(), _input_border_handler(), _activationlayer_function(), _is_activationlayer_enabled(false) + : _direct_conv_kernel(support::cpp14::make_unique()), _input_border_handler(support::cpp14::make_unique()), _activationlayer_function(), + _is_activationlayer_enabled(false) { } +CLDirectConvolutionLayer::~CLDirectConvolutionLayer() = default; + void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) { configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, act_info); @@ -47,10 +52,10 @@ void CLDirectConvolutionLayer::configure(const CLCompileContext &compile_context const ActivationLayerInfo &act_info) { // Set GPU target - _direct_conv_kernel.set_target(CLScheduler::get().target()); + _direct_conv_kernel->set_target(CLScheduler::get().target()); // Configure direct convolution - _direct_conv_kernel.configure(compile_context, input, weights, biases, output, conv_info); + _direct_conv_kernel->configure(compile_context, input, weights, biases, output, conv_info); // Configure border handler PixelValue &&zero_value(0.f); @@ -58,10 +63,10 @@ void CLDirectConvolutionLayer::configure(const CLCompileContext &compile_context { zero_value = PixelValue(0, input->info()->data_type(), input->info()->quantization_info()); } - _input_border_handler.configure(compile_context, input, _direct_conv_kernel.border_size(), BorderMode::CONSTANT, zero_value); + _input_border_handler->configure(compile_context, input, _direct_conv_kernel->border_size(), BorderMode::CONSTANT, zero_value); // Tune kernels - CLScheduler::get().tune_kernel_static(_direct_conv_kernel); + CLScheduler::get().tune_kernel_static(*_direct_conv_kernel); _is_activationlayer_enabled = act_info.enabled(); @@ -86,10 +91,10 @@ Status CLDirectConvolutionLayer::validate(const ITensorInfo *input, const ITenso void CLDirectConvolutionLayer::run() { // Run border handler - CLScheduler::get().enqueue(_input_border_handler, false); + CLScheduler::get().enqueue(*_input_border_handler, false); // Run direct convolution - CLScheduler::get().enqueue(_direct_conv_kernel); + CLScheduler::get().enqueue(*_direct_conv_kernel); //Run Activation Layer if(_is_activationlayer_enabled) diff --git a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp index 0ffafa0221..0e3109439e 100644 --- a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp @@ -23,11 +23,17 @@ */ #include "arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/core/CL/kernels/CLMemsetKernel.h" +#include "src/core/CL/kernels/CLWeightsReshapeKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include diff --git a/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp b/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp index de94255b48..35ed97d381 100644 --- a/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp +++ b/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h" -#include "arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h" +#include "src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLElementwiseOperations.cpp b/src/runtime/CL/functions/CLElementwiseOperations.cpp index 7b4d3c629d..736cf973a1 100644 --- a/src/runtime/CL/functions/CLElementwiseOperations.cpp +++ b/src/runtime/CL/functions/CLElementwiseOperations.cpp @@ -24,8 +24,8 @@ #include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLElementwiseOperationKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLEqualizeHistogram.cpp b/src/runtime/CL/functions/CLEqualizeHistogram.cpp index a1158a71a5..cc927a055b 100644 --- a/src/runtime/CL/functions/CLEqualizeHistogram.cpp +++ b/src/runtime/CL/functions/CLEqualizeHistogram.cpp @@ -28,6 +28,9 @@ #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLHistogramKernel.h" +#include "src/core/CL/kernels/CLTableLookupKernel.h" +#include "support/MemorySupport.h" #include #include @@ -83,10 +86,17 @@ void calculate_cum_dist_and_lut(CLDistribution1D &dist, CLDistribution1D &cum_di } // namespace CLEqualizeHistogram::CLEqualizeHistogram() - : _histogram_kernel(), _border_histogram_kernel(), _map_histogram_kernel(), _hist(nr_bins, 0, max_range), _cum_dist(nr_bins, 0, max_range), _cd_lut(nr_bins, DataType::U8) + : _histogram_kernel(support::cpp14::make_unique()), + _border_histogram_kernel(support::cpp14::make_unique()), + _map_histogram_kernel(support::cpp14::make_unique()), + _hist(nr_bins, 0, max_range), + _cum_dist(nr_bins, 0, max_range), + _cd_lut(nr_bins, DataType::U8) { } +CLEqualizeHistogram::~CLEqualizeHistogram() = default; + void CLEqualizeHistogram::configure(const ICLImage *input, ICLImage *output) { configure(CLKernelLibrary::get().get_compile_context(), input, output); @@ -94,22 +104,22 @@ void CLEqualizeHistogram::configure(const ICLImage *input, ICLImage *output) void CLEqualizeHistogram::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output) { - _histogram_kernel.configure(compile_context, input, &_hist); - _border_histogram_kernel.configure(compile_context, input, &_hist); - _map_histogram_kernel.configure(compile_context, input, &_cd_lut, output); + _histogram_kernel->configure(compile_context, input, &_hist); + _border_histogram_kernel->configure(compile_context, input, &_hist); + _map_histogram_kernel->configure(compile_context, input, &_cd_lut, output); } void CLEqualizeHistogram::run() { // Calculate histogram of input. - CLScheduler::get().enqueue(_histogram_kernel, false); + CLScheduler::get().enqueue(*_histogram_kernel, false); // Calculate remaining pixels when image is not multiple of the elements of histogram kernel - CLScheduler::get().enqueue(_border_histogram_kernel, false); + CLScheduler::get().enqueue(*_border_histogram_kernel, false); // Calculate cumulative distribution of histogram and create LUT. calculate_cum_dist_and_lut(_hist, _cum_dist, _cd_lut); // Map input to output using created LUT. - CLScheduler::get().enqueue(_map_histogram_kernel); + CLScheduler::get().enqueue(*_map_histogram_kernel); } diff --git a/src/runtime/CL/functions/CLErode.cpp b/src/runtime/CL/functions/CLErode.cpp index 5236f620f1..6880c4845a 100644 --- a/src/runtime/CL/functions/CLErode.cpp +++ b/src/runtime/CL/functions/CLErode.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/runtime/CL/functions/CLErode.h" -#include "arm_compute/core/CL/kernels/CLErodeKernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/CL/kernels/CLErodeKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" #include "support/MemorySupport.h" #include @@ -41,5 +42,5 @@ void CLErode::configure(const CLCompileContext &compile_context, ICLTensor *inpu auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value)); } diff --git a/src/runtime/CL/functions/CLFFT1D.cpp b/src/runtime/CL/functions/CLFFT1D.cpp index 1269cba90d..a0078689ff 100644 --- a/src/runtime/CL/functions/CLFFT1D.cpp +++ b/src/runtime/CL/functions/CLFFT1D.cpp @@ -26,15 +26,28 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h" +#include "src/core/CL/kernels/CLFFTRadixStageKernel.h" +#include "src/core/CL/kernels/CLFFTScaleKernel.h" #include "src/core/utils/helpers/fft.h" +#include "support/MemorySupport.h" namespace arm_compute { CLFFT1D::CLFFT1D(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _digit_reverse_kernel(), _fft_kernels(), _scale_kernel(), _digit_reversed_input(), _digit_reverse_indices(), _num_ffts(0), _run_scale(false) + : _memory_group(std::move(memory_manager)), + _digit_reverse_kernel(support::cpp14::make_unique()), + _fft_kernels(), + _scale_kernel(support::cpp14::make_unique()), + _digit_reversed_input(), + _digit_reverse_indices(), + _num_ffts(0), + _run_scale(false) { } +CLFFT1D::~CLFFT1D() = default; + void CLFFT1D::configure(const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config) { configure(CLKernelLibrary::get().get_compile_context(), input, output, config); @@ -62,12 +75,12 @@ void CLFFT1D::configure(const CLCompileContext &compile_context, const ICLTensor TensorInfo digit_reverse_indices_info(TensorShape(input->info()->tensor_shape()[config.axis]), 1, DataType::U32); _digit_reverse_indices.allocator()->init(digit_reverse_indices_info); _memory_group.manage(&_digit_reversed_input); - _digit_reverse_kernel.configure(compile_context, input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config); + _digit_reverse_kernel->configure(compile_context, input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config); // Create and configure FFT kernels unsigned int Nx = 1; _num_ffts = decomposed_vector.size(); - _fft_kernels.resize(_num_ffts); + _fft_kernels.reserve(_num_ffts); for(unsigned int i = 0; i < _num_ffts; ++i) { const unsigned int radix_for_stage = decomposed_vector.at(i); @@ -77,7 +90,8 @@ void CLFFT1D::configure(const CLCompileContext &compile_context, const ICLTensor fft_kernel_info.radix = radix_for_stage; fft_kernel_info.Nx = Nx; fft_kernel_info.is_first_stage = (i == 0); - _fft_kernels[i].configure(compile_context, &_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info); + _fft_kernels.emplace_back(support::cpp14::make_unique()); + _fft_kernels.back()->configure(compile_context, &_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info); Nx *= radix_for_stage; } @@ -88,7 +102,7 @@ void CLFFT1D::configure(const CLCompileContext &compile_context, const ICLTensor FFTScaleKernelInfo scale_config; scale_config.scale = static_cast(N); scale_config.conjugate = config.direction == FFTDirection::Inverse; - is_c2r ? _scale_kernel.configure(compile_context, &_digit_reversed_input, output, scale_config) : _scale_kernel.configure(output, nullptr, scale_config); + is_c2r ? _scale_kernel->configure(compile_context, &_digit_reversed_input, output, scale_config) : _scale_kernel->configure(output, nullptr, scale_config); } // Allocate tensors @@ -132,18 +146,18 @@ void CLFFT1D::run() MemoryGroupResourceScope scope_mg(_memory_group); // Run digit reverse - CLScheduler::get().enqueue(_digit_reverse_kernel, false); + CLScheduler::get().enqueue(*_digit_reverse_kernel, false); // Run radix kernels for(unsigned int i = 0; i < _num_ffts; ++i) { - CLScheduler::get().enqueue(_fft_kernels[i], i == (_num_ffts - 1) && !_run_scale); + CLScheduler::get().enqueue(*_fft_kernels[i], i == (_num_ffts - 1) && !_run_scale); } // Run output scaling if(_run_scale) { - CLScheduler::get().enqueue(_scale_kernel, true); + CLScheduler::get().enqueue(*_scale_kernel, true); } } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLFFT2D.cpp b/src/runtime/CL/functions/CLFFT2D.cpp index 7ab852fa98..1d444bb15d 100644 --- a/src/runtime/CL/functions/CLFFT2D.cpp +++ b/src/runtime/CL/functions/CLFFT2D.cpp @@ -26,6 +26,9 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h" +#include "src/core/CL/kernels/CLFFTRadixStageKernel.h" +#include "src/core/CL/kernels/CLFFTScaleKernel.h" namespace arm_compute { @@ -34,6 +37,8 @@ CLFFT2D::CLFFT2D(std::shared_ptr memory_manager) { } +CLFFT2D::~CLFFT2D() = default; + void CLFFT2D::configure(const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config) { configure(CLKernelLibrary::get().get_compile_context(), input, output, config); diff --git a/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp b/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp index 4d0eab81ee..5472e8469f 100644 --- a/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp @@ -29,6 +29,13 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CPP/CPPScheduler.h" +#include "src/core/CL/kernels/CLCopyKernel.h" +#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h" +#include "src/core/CL/kernels/CLFFTRadixStageKernel.h" +#include "src/core/CL/kernels/CLFFTScaleKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLPadLayerKernel.h" +#include "src/core/CL/kernels/CLReductionOperationKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/utils/helpers/fft.h" diff --git a/src/runtime/CL/functions/CLFastCorners.cpp b/src/runtime/CL/functions/CLFastCorners.cpp index 97f853fdea..110d2c3639 100644 --- a/src/runtime/CL/functions/CLFastCorners.cpp +++ b/src/runtime/CL/functions/CLFastCorners.cpp @@ -24,12 +24,14 @@ #include "arm_compute/runtime/CL/functions/CLFastCorners.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/ITensorAllocator.h" +#include "src/core/CL/kernels/CLFastCornersKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "support/MemorySupport.h" #include #include @@ -38,9 +40,9 @@ using namespace arm_compute; CLFastCorners::CLFastCorners(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), - _fast_corners_kernel(), + _fast_corners_kernel(support::cpp14::make_unique()), _suppr_func(), - _copy_array_kernel(), + _copy_array_kernel(support::cpp14::make_unique()), _output(), _suppr(), _win(), @@ -52,6 +54,8 @@ CLFastCorners::CLFastCorners(std::shared_ptr memory_manager) { } +CLFastCorners::~CLFastCorners() = default; + void CLFastCorners::configure(const ICLImage *input, float threshold, bool nonmax_suppression, ICLKeyPointArray *corners, unsigned int *num_corners, BorderMode border_mode, uint8_t constant_border_value) { @@ -78,11 +82,11 @@ void CLFastCorners::configure(const CLCompileContext &compile_context, const ICL const bool update_number = (nullptr != _num_corners); _memory_group.manage(&_output); - _fast_corners_kernel.configure(compile_context, input, &_output, threshold, nonmax_suppression, border_mode); + _fast_corners_kernel->configure(compile_context, input, &_output, threshold, nonmax_suppression, border_mode); if(!_non_max) { - _copy_array_kernel.configure(compile_context, &_output, update_number, _corners, &_num_buffer); + _copy_array_kernel->configure(compile_context, &_output, update_number, _corners, &_num_buffer); } else { @@ -90,7 +94,7 @@ void CLFastCorners::configure(const CLCompileContext &compile_context, const ICL _memory_group.manage(&_suppr); _suppr_func.configure(compile_context, &_output, &_suppr, border_mode); - _copy_array_kernel.configure(compile_context, &_suppr, update_number, _corners, &_num_buffer); + _copy_array_kernel->configure(compile_context, &_suppr, update_number, _corners, &_num_buffer); _suppr.allocator()->allocate(); } @@ -113,14 +117,14 @@ void CLFastCorners::run() q.enqueueUnmapMemObject(_output.cl_buffer(), out_buffer); } - CLScheduler::get().enqueue(_fast_corners_kernel, false); + CLScheduler::get().enqueue(*_fast_corners_kernel, false); if(_non_max) { _suppr_func.run(); } - CLScheduler::get().enqueue(_copy_array_kernel, false); + CLScheduler::get().enqueue(*_copy_array_kernel, false); unsigned int get_num_corners = 0; q.enqueueReadBuffer(_num_buffer, CL_TRUE, 0, sizeof(unsigned int), &get_num_corners); diff --git a/src/runtime/CL/functions/CLFill.cpp b/src/runtime/CL/functions/CLFill.cpp index a89383ec31..855ed8380a 100644 --- a/src/runtime/CL/functions/CLFill.cpp +++ b/src/runtime/CL/functions/CLFill.cpp @@ -23,8 +23,8 @@ */ #include "arm_compute/runtime/CL/functions/CLFill.h" -#include "arm_compute/core/CL/kernels/CLMemsetKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/CL/kernels/CLMemsetKernel.h" #include "support/MemorySupport.h" diff --git a/src/runtime/CL/functions/CLFillBorder.cpp b/src/runtime/CL/functions/CLFillBorder.cpp index c647bb6a02..27d132b842 100644 --- a/src/runtime/CL/functions/CLFillBorder.cpp +++ b/src/runtime/CL/functions/CLFillBorder.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLFillBorder.h" -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLFlattenLayer.cpp b/src/runtime/CL/functions/CLFlattenLayer.cpp index a826541017..0646a0d3a0 100644 --- a/src/runtime/CL/functions/CLFlattenLayer.cpp +++ b/src/runtime/CL/functions/CLFlattenLayer.cpp @@ -23,8 +23,8 @@ */ #include "arm_compute/runtime/CL/functions/CLFlattenLayer.h" -#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLFlattenLayerKernel.h" #include "support/MemorySupport.h" using namespace arm_compute; diff --git a/src/runtime/CL/functions/CLFloor.cpp b/src/runtime/CL/functions/CLFloor.cpp index 7ed92ac3df..770e6a3781 100644 --- a/src/runtime/CL/functions/CLFloor.cpp +++ b/src/runtime/CL/functions/CLFloor.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLFloor.h" -#include "arm_compute/core/CL/kernels/CLFloorKernel.h" +#include "src/core/CL/kernels/CLFloorKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index 75e87c382b..1796443ca5 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -28,6 +28,19 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/core/CL/kernels/CLTransposeKernel.h" #include "support/Cast.h" #include "support/MemorySupport.h" diff --git a/src/runtime/CL/functions/CLFuseBatchNormalization.cpp b/src/runtime/CL/functions/CLFuseBatchNormalization.cpp index 825267c0fc..f018e5a8ae 100644 --- a/src/runtime/CL/functions/CLFuseBatchNormalization.cpp +++ b/src/runtime/CL/functions/CLFuseBatchNormalization.cpp @@ -28,14 +28,18 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLFuseBatchNormalizationKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { CLFuseBatchNormalization::CLFuseBatchNormalization() - : _fuse_bn_kernel() + : _fuse_bn_kernel(support::cpp14::make_unique()) { } +CLFuseBatchNormalization::~CLFuseBatchNormalization() = default; + void CLFuseBatchNormalization::configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias, const ICLTensor *input_bias, const ICLTensor *bn_beta, const ICLTensor *bn_gamma, @@ -49,7 +53,7 @@ void CLFuseBatchNormalization::configure(const CLCompileContext &compile_context const ICLTensor *input_bias, const ICLTensor *bn_beta, const ICLTensor *bn_gamma, float epsilon, FuseBatchNormalizationType fbn_type) { - _fuse_bn_kernel.configure(compile_context, input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type); + _fuse_bn_kernel->configure(compile_context, input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type); } Status CLFuseBatchNormalization::validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, @@ -62,6 +66,6 @@ Status CLFuseBatchNormalization::validate(const ITensorInfo *input_weights, cons void CLFuseBatchNormalization::run() { - CLScheduler::get().enqueue(_fuse_bn_kernel, true); + CLScheduler::get().enqueue(*_fuse_bn_kernel, true); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp index 80c5496ede..0151485849 100644 --- a/src/runtime/CL/functions/CLGEMM.cpp +++ b/src/runtime/CL/functions/CLGEMM.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLGEMM.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/GPUTarget.h" @@ -38,6 +39,11 @@ #include "src/core/CL/ICLGEMMKernelConfiguration.h" #include "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h" #include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/utils/helpers/float_ops.h" #include "src/runtime/CL/gemm/CLGEMMKernelSelection.h" @@ -51,16 +57,58 @@ using namespace arm_compute::misc::shape_calculator; using namespace arm_compute::cl_gemm; using namespace arm_compute::utils::cast; +namespace weights_transformations +{ +CLGEMMReshapeRHSMatrixKernelManaged::CLGEMMReshapeRHSMatrixKernelManaged() + : _kernel(support::cpp14::make_unique()) +{ +} + +CLGEMMReshapeRHSMatrixKernelManaged::~CLGEMMReshapeRHSMatrixKernelManaged() = default; + +void CLGEMMReshapeRHSMatrixKernelManaged::run() +{ + _output.allocator()->allocate(); + CLScheduler::get().enqueue(*_kernel, false); + _reshape_run = true; +} + +void CLGEMMReshapeRHSMatrixKernelManaged::release() +{ + _output.allocator()->free(); +} + +ICLTensor *CLGEMMReshapeRHSMatrixKernelManaged::get_weights() +{ + return &_output; +} + +uint32_t CLGEMMReshapeRHSMatrixKernelManaged::uid() +{ + return _uid; +} + +void CLGEMMReshapeRHSMatrixKernelManaged::configure(const ICLTensor *input, GEMMRHSMatrixInfo info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, info); +} + +void CLGEMMReshapeRHSMatrixKernelManaged::configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info) +{ + _kernel->configure(compile_context, input, &_output, info); +} +} // namespace weights_transformations + CLGEMM::CLGEMM(std::shared_ptr memory_manager, IWeightsManager *weights_manager) : _memory_group(std::move(memory_manager)), _weights_manager(weights_manager), - _mm_kernel(), - _reshape_lhs_kernel(), - _reshape_rhs_kernel(), - _reshape_rhs_kernel_managed(), - _mm_reshaped_kernel(), - _mm_reshaped_only_rhs_kernel(), - _mm_reshaped_only_rhs_fallback_kernel(), + _mm_kernel(support::cpp14::make_unique()), + _reshape_lhs_kernel(support::cpp14::make_unique()), + _reshape_rhs_kernel(support::cpp14::make_unique()), + _reshape_rhs_kernel_managed(support::cpp14::make_unique()), + _mm_reshaped_kernel(support::cpp14::make_unique()), + _mm_reshaped_only_rhs_kernel(support::cpp14::make_unique()), + _mm_reshaped_only_rhs_fallback_kernel(support::cpp14::make_unique()), _tmp_a(), _tmp_b(), _original_b(nullptr), @@ -73,6 +121,8 @@ CLGEMM::CLGEMM(std::shared_ptr memory_manager, IWeightsManager * { } +CLGEMM::~CLGEMM() = default; + CLGEMMKernelType CLGEMM::select_gemm_kernel(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type, bool reshape_b_only_on_first_run) { std::unique_ptr gemm_kernel = CLGEMMKernelSelectionFactory::create(CLScheduler::get().target()); @@ -98,15 +148,15 @@ void CLGEMM::configure_native_v1(const CLCompileContext &compile_context, const const GPUTarget gpu_target = CLScheduler::get().target(); // Set the target for the kernels - _mm_kernel.set_target(gpu_target); + _mm_kernel->set_target(gpu_target); GEMMReshapeInfo reshape_info(m, n, k, 1, 1, gemm_info.depth_output_gemm3d(), gemm_info.reinterpret_input_as_3d(), gemm_info.broadcast_bias()); // Configure and tune matrix multiply kernel - _mm_kernel.configure(compile_context, a, b, c, output, alpha, beta, false, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info()); + _mm_kernel->configure(compile_context, a, b, c, output, alpha, beta, false, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info()); // Tune kernel statically - CLScheduler::get().tune_kernel_static(_mm_kernel); + CLScheduler::get().tune_kernel_static(*_mm_kernel); } void CLGEMM::configure_reshaped_v1(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, @@ -122,8 +172,8 @@ void CLGEMM::configure_reshaped_v1(const CLCompileContext &compile_context, cons int mult_interleave4x4_height = 1; // Set the target for the kernels - _reshape_lhs_kernel.set_target(gpu_target); - _mm_kernel.set_target(gpu_target); + _reshape_lhs_kernel->set_target(gpu_target); + _mm_kernel->set_target(gpu_target); if(get_arch_from_target(gpu_target) == GPUTarget::BIFROST) { @@ -158,24 +208,24 @@ void CLGEMM::configure_reshaped_v1(const CLCompileContext &compile_context, cons } // Configure interleave kernel - _reshape_lhs_kernel.configure(compile_context, a, &_tmp_a, lhs_info, reinterpret_input_as_3d); + _reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, reinterpret_input_as_3d); // Configure transpose kernel ICLTensor *reshaped_rhs = &_tmp_b; if(_weights_manager && _weights_manager->are_weights_managed(b)) { - _reshape_rhs_kernel_managed.configure(compile_context, b, rhs_info); - reshaped_rhs = utils::cast::polymorphic_downcast(_weights_manager->acquire(b, &_reshape_rhs_kernel_managed)); + _reshape_rhs_kernel_managed->configure(compile_context, b, rhs_info); + reshaped_rhs = utils::cast::polymorphic_downcast(_weights_manager->acquire(b, _reshape_rhs_kernel_managed.get())); } else { - _reshape_rhs_kernel.configure(compile_context, b, &_tmp_b, rhs_info); + _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info); } // Configure and tune matrix multiply kernel - _mm_kernel.configure(compile_context, &_tmp_a, reshaped_rhs, c, output, alpha, beta, true, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info()); + _mm_kernel->configure(compile_context, &_tmp_a, reshaped_rhs, c, output, alpha, beta, true, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info()); - CLScheduler::get().tune_kernel_static(_mm_kernel); + CLScheduler::get().tune_kernel_static(*_mm_kernel); // Allocate intermediate tensors _tmp_a.allocator()->allocate(); @@ -209,8 +259,8 @@ void CLGEMM::configure_reshaped_v2(const CLCompileContext &compile_context, cons kernel_info.activation_info = gemm_info.activation_info(); // Set the target for the kernels - _reshape_lhs_kernel.set_target(gpu_target); - _mm_kernel.set_target(gpu_target); + _reshape_lhs_kernel->set_target(gpu_target); + _mm_kernel->set_target(gpu_target); const bool use_mm_b = (!_weights_manager || !_weights_manager->are_weights_managed(b)); @@ -234,21 +284,21 @@ void CLGEMM::configure_reshaped_v2(const CLCompileContext &compile_context, cons // Configure lhs_info and rhs_info std::tie(lhs_info, rhs_info) = gemm_config->configure(m, n, k, batch_size, data_type); - _reshape_lhs_kernel.configure(compile_context, a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d()); + _reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d()); ICLTensor *reshaped_rhs = &_tmp_b; if(_weights_manager && _weights_manager->are_weights_managed(b)) { - _reshape_rhs_kernel_managed.configure(compile_context, b, rhs_info); - reshaped_rhs = utils::cast::polymorphic_downcast(_weights_manager->acquire(b, &_reshape_rhs_kernel_managed)); + _reshape_rhs_kernel_managed->configure(compile_context, b, rhs_info); + reshaped_rhs = utils::cast::polymorphic_downcast(_weights_manager->acquire(b, _reshape_rhs_kernel_managed.get())); } else { - _reshape_rhs_kernel.configure(compile_context, b, &_tmp_b, rhs_info); + _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info); } // Configure and tune matrix multiply kernel - _mm_reshaped_kernel.configure(compile_context, &_tmp_a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info); + _mm_reshaped_kernel->configure(compile_context, &_tmp_a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info); // Allocate intermediate tensors _tmp_a.allocator()->allocate(); @@ -282,7 +332,7 @@ void CLGEMM::configure_reshaped_only_rhs(const CLCompileContext &compile_context kernel_info.activation_info = gemm_info.activation_info(); // Set the target for the kernels - _mm_kernel.set_target(gpu_target); + _mm_kernel->set_target(gpu_target); const bool use_mm_b = (!_weights_manager || !_weights_manager->are_weights_managed(b)); @@ -305,12 +355,12 @@ void CLGEMM::configure_reshaped_only_rhs(const CLCompileContext &compile_context ICLTensor *reshaped_rhs = &_tmp_b; if(_weights_manager && _weights_manager->are_weights_managed(b)) { - _reshape_rhs_kernel_managed.configure(compile_context, b, rhs_info); - reshaped_rhs = utils::cast::polymorphic_downcast(_weights_manager->acquire(b, &_reshape_rhs_kernel_managed)); + _reshape_rhs_kernel_managed->configure(compile_context, b, rhs_info); + reshaped_rhs = utils::cast::polymorphic_downcast(_weights_manager->acquire(b, _reshape_rhs_kernel_managed.get())); } else { - _reshape_rhs_kernel.configure(compile_context, b, &_tmp_b, rhs_info); + _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info); } // Configure two variants of CLGEMMMatrixMultiplyReshapedOnlyRHSKernel (has_pad_y = false/true) @@ -319,11 +369,11 @@ void CLGEMM::configure_reshaped_only_rhs(const CLCompileContext &compile_context // Configure matrix multiply kernel with no y padding support kernel_info.has_pad_y = false; - _mm_reshaped_only_rhs_kernel.configure(compile_context, a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info); + _mm_reshaped_only_rhs_kernel->configure(compile_context, a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info); // Configure matrix multiply kernel with y padding support kernel_info.has_pad_y = true; - _mm_reshaped_only_rhs_fallback_kernel.configure(compile_context, a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info); + _mm_reshaped_only_rhs_fallback_kernel->configure(compile_context, a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info); if(!_reshape_b_only_on_first_run && use_mm_b) { @@ -626,49 +676,49 @@ void CLGEMM::run() { case CLGEMMKernelType::NATIVE_V1: { - CLScheduler::get().enqueue(_mm_kernel, true); + CLScheduler::get().enqueue(*_mm_kernel, true); break; } case CLGEMMKernelType::RESHAPED_V1: { // Run interleave kernel - CLScheduler::get().enqueue(_reshape_lhs_kernel, false); + CLScheduler::get().enqueue(*_reshape_lhs_kernel, false); if(!_reshape_b_only_on_first_run) { // Run transpose kernel if(_weights_manager && _weights_manager->are_weights_managed(_original_b)) { - _weights_manager->run(_original_b, &_reshape_rhs_kernel_managed); + _weights_manager->run(_original_b, _reshape_rhs_kernel_managed.get()); } else { - CLScheduler::get().enqueue(_reshape_rhs_kernel, false); + CLScheduler::get().enqueue(*_reshape_rhs_kernel, false); } } - CLScheduler::get().enqueue(_mm_kernel, true); + CLScheduler::get().enqueue(*_mm_kernel, true); break; } case CLGEMMKernelType::RESHAPED: { // Run interleave kernel - CLScheduler::get().enqueue(_reshape_lhs_kernel, false); + CLScheduler::get().enqueue(*_reshape_lhs_kernel, false); if(!_reshape_b_only_on_first_run) { // Run transpose kernel if(_weights_manager && _weights_manager->are_weights_managed(_original_b)) { - _weights_manager->run(_original_b, &_reshape_rhs_kernel_managed); + _weights_manager->run(_original_b, _reshape_rhs_kernel_managed.get()); } else { - CLScheduler::get().enqueue(_reshape_rhs_kernel, false); + CLScheduler::get().enqueue(*_reshape_rhs_kernel, false); } } - CLScheduler::get().enqueue(_mm_reshaped_kernel, true); + CLScheduler::get().enqueue(*_mm_reshaped_kernel, true); break; } case CLGEMMKernelType::RESHAPED_ONLY_RHS: @@ -678,20 +728,20 @@ void CLGEMM::run() // Run transpose kernel if(_weights_manager && _weights_manager->are_weights_managed(_original_b)) { - _weights_manager->run(_original_b, &_reshape_rhs_kernel_managed); + _weights_manager->run(_original_b, _reshape_rhs_kernel_managed.get()); } else { - CLScheduler::get().enqueue(_reshape_rhs_kernel, false); + CLScheduler::get().enqueue(*_reshape_rhs_kernel, false); } } if(_has_pad_y) { - CLScheduler::get().enqueue(_mm_reshaped_only_rhs_fallback_kernel, true); + CLScheduler::get().enqueue(*_mm_reshaped_only_rhs_fallback_kernel, true); } else { - CLScheduler::get().enqueue(_mm_reshaped_only_rhs_kernel, true); + CLScheduler::get().enqueue(*_mm_reshaped_only_rhs_kernel, true); } break; } @@ -720,13 +770,13 @@ void CLGEMM::prepare() { if(_weights_manager && _weights_manager->are_weights_managed(_original_b)) { - _weights_manager->run(_original_b, &_reshape_rhs_kernel_managed); + _weights_manager->run(_original_b, _reshape_rhs_kernel_managed.get()); } else { // Run transpose kernel and mark original weights tensor as unused _tmp_b.allocator()->allocate(); - CLScheduler::get().enqueue(_reshape_rhs_kernel, false); + CLScheduler::get().enqueue(*_reshape_rhs_kernel, false); _original_b->mark_as_unused(); } } diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index e871b39805..4d26df5e43 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -30,8 +30,23 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLCol2ImKernel.h" +#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/core/CL/kernels/CLIm2ColKernel.h" +#include "src/core/CL/kernels/CLWeightsReshapeKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "support/Cast.h" +#include "support/MemorySupport.h" #include #include @@ -43,10 +58,12 @@ using namespace arm_compute::misc::shape_calculator; using namespace arm_compute::utils::cast; CLConvolutionLayerReshapeWeights::CLConvolutionLayerReshapeWeights() - : _weights_reshape_kernel() + : _weights_reshape_kernel(support::cpp14::make_unique()) { } +CLConvolutionLayerReshapeWeights::~CLConvolutionLayerReshapeWeights() = default; + void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups) { configure(CLKernelLibrary::get().get_compile_context(), weights, biases, output, num_groups); @@ -64,7 +81,7 @@ void CLConvolutionLayerReshapeWeights::configure(const CLCompileContext &compile const bool append_biases = (biases != nullptr) && !is_data_type_quantized_asymmetric(weights->info()->data_type()); const ICLTensor *biases_to_use = (append_biases) ? biases : nullptr; - _weights_reshape_kernel.configure(compile_context, weights, biases_to_use, output, num_groups); + _weights_reshape_kernel->configure(compile_context, weights, biases_to_use, output, num_groups); output->info()->set_quantization_info(weights->info()->quantization_info()); } @@ -96,16 +113,18 @@ Status CLConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, co void CLConvolutionLayerReshapeWeights::run() { - CLScheduler::get().enqueue(_weights_reshape_kernel); + CLScheduler::get().enqueue(*_weights_reshape_kernel); } CLGEMMConvolutionLayer::CLGEMMConvolutionLayer(std::shared_ptr memory_manager, IWeightsManager *weights_manager) - : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(), _mm_gemm(memory_manager, weights_manager), - _mm_gemmlowp(memory_manager), _col2im_kernel(), _activationlayer_function(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _skip_im2col(false), - _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _is_prepared(false) + : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(support::cpp14::make_unique()), + _mm_gemm(memory_manager, weights_manager), _mm_gemmlowp(memory_manager), _col2im_kernel(support::cpp14::make_unique()), _activationlayer_function(), _original_weights(nullptr), + _im2col_output(), _weights_reshaped(), _gemm_output(), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _is_prepared(false) { } +CLGEMMConvolutionLayer::~CLGEMMConvolutionLayer() = default; + void CLGEMMConvolutionLayer::configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth, const ActivationLayerInfo &act_info) @@ -230,8 +249,8 @@ void CLGEMMConvolutionLayer::configure(const CLCompileContext &compile_context, _fuse_activation = true; // Set the GPU target for im2col and col2im - _im2col_kernel.set_target(CLScheduler::get().target()); - _col2im_kernel.set_target(CLScheduler::get().target()); + _im2col_kernel->set_target(CLScheduler::get().target()); + _col2im_kernel->set_target(CLScheduler::get().target()); const ICLTensor *gemm_input_to_use = input; ICLTensor *gemm_output_to_use = output; @@ -293,11 +312,11 @@ void CLGEMMConvolutionLayer::configure(const CLCompileContext &compile_context, _memory_group.manage(&_im2col_output); // Configure and tune im2col. im2col output shape is auto-initialized - _im2col_kernel.configure(compile_context, input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation, num_groups); + _im2col_kernel->configure(compile_context, input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation, num_groups); // Set quantization info _im2col_output.info()->set_quantization_info(input->info()->quantization_info()); - CLScheduler::get().tune_kernel_static(_im2col_kernel); + CLScheduler::get().tune_kernel_static(*_im2col_kernel); // Update GEMM input gemm_input_to_use = &_im2col_output; @@ -390,8 +409,8 @@ void CLGEMMConvolutionLayer::configure(const CLCompileContext &compile_context, if(!_skip_col2im) { // Configure and tune Col2Im - _col2im_kernel.configure(compile_context, gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups); - CLScheduler::get().tune_kernel_static(_col2im_kernel); + _col2im_kernel->configure(compile_context, gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups); + CLScheduler::get().tune_kernel_static(*_col2im_kernel.get()); } if(!_skip_col2im) @@ -611,7 +630,7 @@ void CLGEMMConvolutionLayer::run() // Run im2col if(!_skip_im2col) { - CLScheduler::get().enqueue(_im2col_kernel); + CLScheduler::get().enqueue(*_im2col_kernel); } // Runs CLGEMM or CLGEMMLowpMatrixMultiplyCore functions @@ -629,7 +648,7 @@ void CLGEMMConvolutionLayer::run() // Reshape output matrix if(!_skip_col2im) { - CLScheduler::get().enqueue(_col2im_kernel, false); + CLScheduler::get().enqueue(*_col2im_kernel.get(), false); } //Run Activation Layer if we cannot fuse in GEMM diff --git a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp index 5fc9c17bef..4d277f0982 100644 --- a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp @@ -28,8 +28,23 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h" +#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/core/CL/kernels/CLIm2ColKernel.h" +#include "src/core/CL/kernels/CLWeightsReshapeKernel.h" +#include "support/MemorySupport.h" -#include #include namespace arm_compute @@ -99,7 +114,7 @@ CLGEMMDeconvolutionLayer::CLGEMMDeconvolutionLayer(std::shared_ptr()), _slice_gemm(), _gemmlowp_final(), _reshaped_weights(), @@ -116,6 +131,8 @@ CLGEMMDeconvolutionLayer::CLGEMMDeconvolutionLayer(std::shared_ptrinfo(), weights->info(), deconv_info); + _deconv_reshape->configure(compile_context, &_gemm_output, bias, deconv_reshape_output, input->info(), weights->info(), deconv_info); _gemm_output.allocator()->allocate(); if(_is_quantized) @@ -357,7 +374,7 @@ void CLGEMMDeconvolutionLayer::run() _mm_gemm.run(); } - CLScheduler::get().enqueue(_deconv_reshape, false); + CLScheduler::get().enqueue(*_deconv_reshape, false); if(_is_quantized) { diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp index 7a8de6c1f5..d3d80a39e3 100644 --- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp @@ -35,8 +35,16 @@ #include "arm_compute/runtime/CL/CLScheduler.h" #include "src/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h" #include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h" +#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/runtime/CL/gemm/CLGEMMKernelSelection.h" +#include "support/MemorySupport.h" namespace arm_compute { @@ -71,14 +79,14 @@ inline bool is_gemm_reshaped(unsigned int m, unsigned int n, unsigned int k, Dat CLGEMMLowpMatrixMultiplyCore::CLGEMMLowpMatrixMultiplyCore(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), - _weights_to_qasymm8(), - _mm_native_kernel(), - _mm_reshaped_only_rhs_kernel(), - _mtx_b_reshape_kernel(), - _mtx_a_reduction_kernel(), - _mtx_b_reduction_kernel(), - _offset_contribution_kernel(), - _offset_contribution_output_stage_kernel(), + _weights_to_qasymm8(support::cpp14::make_unique()), + _mm_native_kernel(support::cpp14::make_unique()), + _mm_reshaped_only_rhs_kernel(support::cpp14::make_unique()), + _mtx_b_reshape_kernel(support::cpp14::make_unique()), + _mtx_a_reduction_kernel(support::cpp14::make_unique()), + _mtx_b_reduction_kernel(support::cpp14::make_unique()), + _offset_contribution_kernel(support::cpp14::make_unique()), + _offset_contribution_output_stage_kernel(support::cpp14::make_unique()), _qasymm8_weights(), _vector_sum_col(), _vector_sum_row(), @@ -100,6 +108,8 @@ CLGEMMLowpMatrixMultiplyCore::CLGEMMLowpMatrixMultiplyCore(std::shared_ptrset_target(gpu_target); + _mm_reshaped_only_rhs_kernel->set_target(gpu_target); GEMMRHSMatrixInfo rhs_info; GEMMLHSMatrixInfo lhs_info; @@ -150,7 +160,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con TensorInfo weights_info(*b->info()); weights_info.set_data_type(DataType::QASYMM8); _qasymm8_weights.allocator()->init(weights_info); - _weights_to_qasymm8.configure(compile_context, b, &_qasymm8_weights, ConvertPolicy::WRAP, 0); + _weights_to_qasymm8->configure(compile_context, b, &_qasymm8_weights, ConvertPolicy::WRAP, 0); } const ICLTensor *matrix_b = _convert_to_qasymm8 ? &_qasymm8_weights : b; @@ -168,7 +178,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con std::tie(lhs_info, rhs_info) = CLGEMMReshapedOnlyRHSKernelConfigurationFactory::create(gpu_target)->configure(m, n, k, batch_size, DataType::QASYMM8); // Configure reshape RHS kernel - _mtx_b_reshape_kernel.configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_tmp_b, rhs_info); + _mtx_b_reshape_kernel->configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_tmp_b, rhs_info); } // Using default reduction info @@ -185,7 +195,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con } // Configure Matrix B reduction kernel - _mtx_b_reduction_kernel.configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_vector_sum_col, reduction_info); + _mtx_b_reduction_kernel->configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_vector_sum_col, reduction_info); } // Initialize Matrix A reduction kernel only if _b_offset is not equal to 0 @@ -196,7 +206,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con _memory_group.manage(&_vector_sum_row); // Configure matrix A reduction kernel - _mtx_a_reduction_kernel.configure(compile_context, a, &_vector_sum_row, reduction_info); + _mtx_a_reduction_kernel->configure(compile_context, a, &_vector_sum_row, reduction_info); } GEMMKernelInfo gemm_kernel_info; @@ -226,8 +236,8 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con if(_is_gemm_reshaped && gemmlowp_output_stage.type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT) { // Configure and tune matrix multiply kernel with fused output stage - _mm_reshaped_only_rhs_kernel.configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info, _a_offset == 0 ? nullptr : &_vector_sum_col, - _b_offset == 0 ? nullptr : &_vector_sum_row, c, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts); + _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info, _a_offset == 0 ? nullptr : &_vector_sum_col, + _b_offset == 0 ? nullptr : &_vector_sum_row, c, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts); } else { @@ -237,7 +247,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con if(_is_gemm_reshaped) { - _mm_reshaped_only_rhs_kernel.configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, gemm_kernel_info); + _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, gemm_kernel_info); } else { @@ -245,11 +255,11 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con std::tie(lhs_info, rhs_info) = CLGEMMNativeKernelConfigurationFactory::create(gpu_target)->configure(m, n, k, batch_size, DataType::QASYMM8); // Configure matrix multiply kernel - _mm_native_kernel.configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, lhs_info, rhs_info, GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d)); + _mm_native_kernel->configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, lhs_info, rhs_info, GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d)); - _offset_contribution_output_stage_kernel.configure(compile_context, &_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, output, - a->info()->dimension(0), - _a_offset, _b_offset, gemmlowp_output_stage, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts); + _offset_contribution_output_stage_kernel->configure(compile_context, &_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, output, + a->info()->dimension(0), + _a_offset, _b_offset, gemmlowp_output_stage, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts); _mm_result_s32.allocator()->allocate(); } } @@ -270,7 +280,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con if(_is_gemm_reshaped) { // Configure and tune matrix multiply kernel - _mm_reshaped_only_rhs_kernel.configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info); + _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info); } else { @@ -278,12 +288,12 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con std::tie(lhs_info, rhs_info) = CLGEMMNativeKernelConfigurationFactory::create(gpu_target)->configure(m, n, k, batch_size, DataType::QASYMM8); // Configure matrix multiply kernel - _mm_native_kernel.configure(compile_context, _matrix_a, matrix_b, output, lhs_info, rhs_info, GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d)); + _mm_native_kernel->configure(compile_context, _matrix_a, matrix_b, output, lhs_info, rhs_info, GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d)); } // Configure offset contribution kernel - _offset_contribution_kernel.configure(compile_context, output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, a->info()->dimension(0), _a_offset, - _b_offset); + _offset_contribution_kernel->configure(compile_context, output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, a->info()->dimension(0), _a_offset, + _b_offset); } // Allocate tensors @@ -489,40 +499,40 @@ void CLGEMMLowpMatrixMultiplyCore::run() if(!_reshape_b_only_on_first_run) { // Run reshape matrix B - CLScheduler::get().enqueue(_mtx_b_reshape_kernel, false); + CLScheduler::get().enqueue(*_mtx_b_reshape_kernel, false); } } // Run matrix B reduction kernel only if _a_offset is not equal to 0 if(_a_offset != 0 && !_reshape_b_only_on_first_run) { - CLScheduler::get().enqueue(_mtx_b_reduction_kernel, false); + CLScheduler::get().enqueue(*_mtx_b_reduction_kernel, false); } // Run matrix A reduction kernel only if _b_offset is not equal to 0 if(_b_offset != 0) { - CLScheduler::get().enqueue(_mtx_a_reduction_kernel, false); + CLScheduler::get().enqueue(*_mtx_a_reduction_kernel, false); } // Run matrix multiply if(_is_gemm_reshaped) { - CLScheduler::get().enqueue(_mm_reshaped_only_rhs_kernel, false); + CLScheduler::get().enqueue(*_mm_reshaped_only_rhs_kernel, false); } else { - CLScheduler::get().enqueue(_mm_native_kernel, false); + CLScheduler::get().enqueue(*_mm_native_kernel, false); } if(_run_output_stage) { // Run offset contribution/output stage kernel - CLScheduler::get().enqueue(_offset_contribution_output_stage_kernel, true); + CLScheduler::get().enqueue(*_offset_contribution_output_stage_kernel, true); } if(_run_offset_contribution) { // Run offset contribution kernel - CLScheduler::get().enqueue(_offset_contribution_kernel, true); + CLScheduler::get().enqueue(*_offset_contribution_kernel, true); } } @@ -533,7 +543,7 @@ void CLGEMMLowpMatrixMultiplyCore::prepare() if(_convert_to_qasymm8) { _qasymm8_weights.allocator()->allocate(); - CLScheduler::get().enqueue(_weights_to_qasymm8, false); + CLScheduler::get().enqueue(*_weights_to_qasymm8, false); } if(_is_gemm_reshaped && _reshape_b_only_on_first_run) @@ -542,7 +552,7 @@ void CLGEMMLowpMatrixMultiplyCore::prepare() // Run reshape kernel and mark original weights tensor as unused _tmp_b.allocator()->allocate(); - CLScheduler::get().enqueue(_mtx_b_reshape_kernel, false); + CLScheduler::get().enqueue(*_mtx_b_reshape_kernel, false); _original_b->mark_as_unused(); } @@ -550,7 +560,7 @@ void CLGEMMLowpMatrixMultiplyCore::prepare() if(_a_offset != 0 && _reshape_b_only_on_first_run) { _vector_sum_col.allocator()->allocate(); - CLScheduler::get().enqueue(_mtx_b_reduction_kernel, false); + CLScheduler::get().enqueue(*_mtx_b_reduction_kernel, false); } CLScheduler::get().queue().finish(); diff --git a/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp b/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp index 28f397fd8b..f9c5247d2d 100644 --- a/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp +++ b/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp @@ -24,11 +24,14 @@ #include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h" +#include "arm_compute/core/Types.h" +#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h" #include "support/MemorySupport.h" +#include + namespace arm_compute { void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, diff --git a/src/runtime/CL/functions/CLGather.cpp b/src/runtime/CL/functions/CLGather.cpp index d9b6679ebf..de6296f6a3 100644 --- a/src/runtime/CL/functions/CLGather.cpp +++ b/src/runtime/CL/functions/CLGather.cpp @@ -24,7 +24,7 @@ #include "arm_compute/runtime/CL/functions/CLGather.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLGatherKernel.h" +#include "src/core/CL/kernels/CLGatherKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLGaussian3x3.cpp b/src/runtime/CL/functions/CLGaussian3x3.cpp index c62e200315..97db9ba06d 100644 --- a/src/runtime/CL/functions/CLGaussian3x3.cpp +++ b/src/runtime/CL/functions/CLGaussian3x3.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/runtime/CL/functions/CLGaussian3x3.h" -#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLGaussian3x3Kernel.h" #include "support/MemorySupport.h" #include @@ -41,5 +42,5 @@ void CLGaussian3x3::configure(const CLCompileContext &compile_context, ICLTensor auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); } diff --git a/src/runtime/CL/functions/CLGaussian5x5.cpp b/src/runtime/CL/functions/CLGaussian5x5.cpp index 1fe2fddfb6..f7470d4ecf 100644 --- a/src/runtime/CL/functions/CLGaussian5x5.cpp +++ b/src/runtime/CL/functions/CLGaussian5x5.cpp @@ -24,22 +24,30 @@ #include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/ITensorAllocator.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLGaussian5x5Kernel.h" +#include "support/MemorySupport.h" #include using namespace arm_compute; CLGaussian5x5::CLGaussian5x5(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _kernel_hor(), _kernel_vert(), _border_handler(), _tmp() + : _memory_group(std::move(memory_manager)), + _kernel_hor(support::cpp14::make_unique()), + _kernel_vert(support::cpp14::make_unique()), + _border_handler(support::cpp14::make_unique()), + _tmp() { } +CLGaussian5x5::~CLGaussian5x5() = default; + void CLGaussian5x5::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value) { configure(CLKernelLibrary::get().get_compile_context(), input, output, border_mode, constant_border_value); @@ -55,9 +63,9 @@ void CLGaussian5x5::configure(const CLCompileContext &compile_context, ICLTensor _memory_group.manage(&_tmp); // Configure kernels - _kernel_hor.configure(compile_context, input, &_tmp, border_mode == BorderMode::UNDEFINED); - _kernel_vert.configure(compile_context, &_tmp, output, border_mode == BorderMode::UNDEFINED); - _border_handler.configure(compile_context, input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value)); + _kernel_hor->configure(compile_context, input, &_tmp, border_mode == BorderMode::UNDEFINED); + _kernel_vert->configure(compile_context, &_tmp, output, border_mode == BorderMode::UNDEFINED); + _border_handler->configure(compile_context, input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value)); // Allocate intermediate buffers _tmp.allocator()->allocate(); @@ -65,10 +73,10 @@ void CLGaussian5x5::configure(const CLCompileContext &compile_context, ICLTensor void CLGaussian5x5::run() { - CLScheduler::get().enqueue(_border_handler, false); + CLScheduler::get().enqueue(*_border_handler, false); MemoryGroupResourceScope scope_mg(_memory_group); - CLScheduler::get().enqueue(_kernel_hor, false); - CLScheduler::get().enqueue(_kernel_vert); + CLScheduler::get().enqueue(*_kernel_hor, false); + CLScheduler::get().enqueue(*_kernel_vert); } diff --git a/src/runtime/CL/functions/CLGaussianPyramid.cpp b/src/runtime/CL/functions/CLGaussianPyramid.cpp index 297d535ba5..66b85352c1 100644 --- a/src/runtime/CL/functions/CLGaussianPyramid.cpp +++ b/src/runtime/CL/functions/CLGaussianPyramid.cpp @@ -24,19 +24,21 @@ #include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h" -#include "arm_compute/core/CL/kernels/CLScaleKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" - #include "arm_compute/runtime/CL/CLPyramid.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLGaussian5x5Kernel.h" +#include "src/core/CL/kernels/CLGaussianPyramidKernel.h" +#include "src/core/CL/kernels/CLScaleKernel.h" +#include "support/MemorySupport.h" #include @@ -47,6 +49,8 @@ CLGaussianPyramid::CLGaussianPyramid() { } +CLGaussianPyramid::~CLGaussianPyramid() = default; + CLGaussianPyramidHalf::CLGaussianPyramidHalf() // NOLINT : _horizontal_border_handler(), _vertical_border_handler(), @@ -55,6 +59,8 @@ CLGaussianPyramidHalf::CLGaussianPyramidHalf() // NOLINT { } +CLGaussianPyramidHalf::~CLGaussianPyramidHalf() = default; + void CLGaussianPyramidHalf::configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) { configure(CLKernelLibrary::get().get_compile_context(), input, pyramid, border_mode, constant_border_value); @@ -80,10 +86,10 @@ void CLGaussianPyramidHalf::configure(const CLCompileContext &compile_context, I if(num_levels > 1) { - _horizontal_border_handler.resize(num_levels - 1); - _vertical_border_handler.resize(num_levels - 1); - _horizontal_reduction.resize(num_levels - 1); - _vertical_reduction.resize(num_levels - 1); + _horizontal_border_handler.reserve(num_levels - 1); + _vertical_border_handler.reserve(num_levels - 1); + _horizontal_reduction.reserve(num_levels - 1); + _vertical_reduction.reserve(num_levels - 1); // Apply half scale to the X dimension of the tensor shape TensorShape tensor_shape = pyramid->info()->tensor_shape(); @@ -95,16 +101,20 @@ void CLGaussianPyramidHalf::configure(const CLCompileContext &compile_context, I for(size_t i = 0; i < num_levels - 1; ++i) { /* Configure horizontal kernel */ - _horizontal_reduction[i].configure(compile_context, _pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i)); + _horizontal_reduction.emplace_back(support::cpp14::make_unique()); + _horizontal_reduction.back()->configure(compile_context, _pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i)); /* Configure vertical kernel */ - _vertical_reduction[i].configure(compile_context, _tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1)); + _vertical_reduction.emplace_back(support::cpp14::make_unique()); + _vertical_reduction.back()->configure(compile_context, _tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1)); /* Configure border */ - _horizontal_border_handler[i].configure(compile_context, _pyramid->get_pyramid_level(i), _horizontal_reduction[i].border_size(), border_mode, PixelValue(constant_border_value)); + _horizontal_border_handler.emplace_back(support::cpp14::make_unique()); + _horizontal_border_handler.back()->configure(compile_context, _pyramid->get_pyramid_level(i), _horizontal_reduction.back()->border_size(), border_mode, PixelValue(constant_border_value)); /* Configure border */ - _vertical_border_handler[i].configure(compile_context, _tmp.get_pyramid_level(i), _vertical_reduction[i].border_size(), border_mode, PixelValue(pixel_value_u16)); + _vertical_border_handler.emplace_back(support::cpp14::make_unique()); + _vertical_border_handler.back()->configure(compile_context, _tmp.get_pyramid_level(i), _vertical_reduction.back()->border_size(), border_mode, PixelValue(pixel_value_u16)); } _tmp.allocate(); } @@ -127,10 +137,10 @@ void CLGaussianPyramidHalf::run() for(unsigned int i = 0; i < num_levels - 1; ++i) { - CLScheduler::get().enqueue(_horizontal_border_handler[i], false); - CLScheduler::get().enqueue(_horizontal_reduction[i], false); - CLScheduler::get().enqueue(_vertical_border_handler[i], false); - CLScheduler::get().enqueue(_vertical_reduction[i], false); + CLScheduler::get().enqueue(*_horizontal_border_handler[i], false); + CLScheduler::get().enqueue(*_horizontal_reduction[i], false); + CLScheduler::get().enqueue(*_vertical_border_handler[i], false); + CLScheduler::get().enqueue(*_vertical_reduction[i], false); } } @@ -163,7 +173,7 @@ void CLGaussianPyramidOrb::configure(const CLCompileContext &compile_context, IC if(num_levels > 1) { _gauss5x5.resize(num_levels - 1); - _scale_nearest.resize(num_levels - 1); + _scale_nearest.reserve(num_levels - 1); PyramidInfo pyramid_info(num_levels - 1, SCALE_PYRAMID_ORB, pyramid->info()->tensor_shape(), Format::U8); @@ -175,7 +185,8 @@ void CLGaussianPyramidOrb::configure(const CLCompileContext &compile_context, IC _gauss5x5[i].configure(compile_context, _pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i), border_mode, constant_border_value); /* Configure scale image kernel */ - _scale_nearest[i].configure(compile_context, _tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), ScaleKernelInfo{ InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, PixelValue(), SamplingPolicy::CENTER }); + _scale_nearest.emplace_back(support::cpp14::make_unique()); + _scale_nearest.back()->configure(compile_context, _tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), ScaleKernelInfo{ InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, PixelValue(), SamplingPolicy::CENTER }); } _tmp.allocate(); @@ -199,6 +210,6 @@ void CLGaussianPyramidOrb::run() for(unsigned int i = 0; i < num_levels - 1; ++i) { _gauss5x5[i].run(); - CLScheduler::get().enqueue(_scale_nearest[i]); + CLScheduler::get().enqueue(*_scale_nearest[i]); } } diff --git a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp index 5291de074a..87bf39030a 100644 --- a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp +++ b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp @@ -25,22 +25,29 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Types.h" +#include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h" +#include "src/core/CL/kernels/CLDequantizationLayerKernel.h" +#include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h" +#include "src/core/CL/kernels/CLPadLayerKernel.h" +#include "src/core/CL/kernels/CLPermuteKernel.h" +#include "src/core/CL/kernels/CLQuantizationLayerKernel.h" #include "src/core/helpers/AutoConfiguration.h" +#include "support/MemorySupport.h" namespace arm_compute { CLGenerateProposalsLayer::CLGenerateProposalsLayer(std::shared_ptr memory_manager) : _memory_group(memory_manager), - _permute_deltas_kernel(), + _permute_deltas_kernel(support::cpp14::make_unique()), _flatten_deltas(), - _permute_scores_kernel(), + _permute_scores_kernel(support::cpp14::make_unique()), _flatten_scores(), - _compute_anchors_kernel(), - _bounding_box_kernel(), - _pad_kernel(), - _dequantize_anchors(), - _dequantize_deltas(), - _quantize_all_proposals(), + _compute_anchors_kernel(support::cpp14::make_unique()), + _bounding_box_kernel(support::cpp14::make_unique()), + _pad_kernel(support::cpp14::make_unique()), + _dequantize_anchors(support::cpp14::make_unique()), + _dequantize_deltas(support::cpp14::make_unique()), + _quantize_all_proposals(support::cpp14::make_unique()), _cpp_nms(memory_manager), _is_nhwc(false), _is_qasymm8(false), @@ -62,6 +69,8 @@ CLGenerateProposalsLayer::CLGenerateProposalsLayer(std::shared_ptrconfigure(compile_context, anchors, &_all_anchors, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale())); const TensorShape flatten_shape_deltas(values_per_roi, total_num_anchors); _deltas_flattened.allocator()->init(TensorInfo(flatten_shape_deltas, 1, scores_data_type, deltas->info()->quantization_info())); @@ -102,7 +111,7 @@ void CLGenerateProposalsLayer::configure(const CLCompileContext &compile_context if(!_is_nhwc) { _memory_group.manage(&_deltas_permuted); - _permute_deltas_kernel.configure(compile_context, deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 }); + _permute_deltas_kernel->configure(compile_context, deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 }); _flatten_deltas.configure(compile_context, &_deltas_permuted, &_deltas_flattened); _deltas_permuted.allocator()->allocate(); } @@ -119,7 +128,7 @@ void CLGenerateProposalsLayer::configure(const CLCompileContext &compile_context if(!_is_nhwc) { _memory_group.manage(&_scores_permuted); - _permute_scores_kernel.configure(compile_context, scores, &_scores_permuted, PermutationVector{ 2, 0, 1 }); + _permute_scores_kernel->configure(compile_context, scores, &_scores_permuted, PermutationVector{ 2, 0, 1 }); _flatten_scores.configure(compile_context, &_scores_permuted, &_scores_flattened); _scores_permuted.allocator()->allocate(); } @@ -137,18 +146,18 @@ void CLGenerateProposalsLayer::configure(const CLCompileContext &compile_context _memory_group.manage(&_all_anchors_f32); _memory_group.manage(&_deltas_flattened_f32); // Dequantize anchors to float - _dequantize_anchors.configure(compile_context, &_all_anchors, &_all_anchors_f32); + _dequantize_anchors->configure(compile_context, &_all_anchors, &_all_anchors_f32); _all_anchors.allocator()->allocate(); anchors_to_use = &_all_anchors_f32; // Dequantize deltas to float - _dequantize_deltas.configure(compile_context, &_deltas_flattened, &_deltas_flattened_f32); + _dequantize_deltas->configure(compile_context, &_deltas_flattened, &_deltas_flattened_f32); _deltas_flattened.allocator()->allocate(); deltas_to_use = &_deltas_flattened_f32; } // Bounding box transform _memory_group.manage(&_all_proposals); BoundingBoxTransformInfo bbox_info(info.im_width(), info.im_height(), 1.f); - _bounding_box_kernel.configure(compile_context, anchors_to_use, &_all_proposals, deltas_to_use, bbox_info); + _bounding_box_kernel->configure(compile_context, anchors_to_use, &_all_proposals, deltas_to_use, bbox_info); deltas_to_use->allocator()->allocate(); anchors_to_use->allocator()->allocate(); @@ -158,7 +167,7 @@ void CLGenerateProposalsLayer::configure(const CLCompileContext &compile_context _memory_group.manage(&_all_proposals_quantized); // Requantize all_proposals to QASYMM16 with 0.125 scale and 0 offset _all_proposals_quantized.allocator()->init(TensorInfo(_all_proposals.info()->tensor_shape(), 1, DataType::QASYMM16, QuantizationInfo(0.125f, 0))); - _quantize_all_proposals.configure(compile_context, &_all_proposals, &_all_proposals_quantized); + _quantize_all_proposals->configure(compile_context, &_all_proposals, &_all_proposals_quantized); _all_proposals.allocator()->allocate(); _all_proposals_to_use = &_all_proposals_quantized; } @@ -193,7 +202,7 @@ void CLGenerateProposalsLayer::configure(const CLCompileContext &compile_context _scores_flattened.allocator()->allocate(); // Add the first column that represents the batch id. This will be all zeros, as we don't support multiple images - _pad_kernel.configure(compile_context, &_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } }); + _pad_kernel->configure(compile_context, &_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } }); _proposals_4_roi_values.allocator()->allocate(); } @@ -343,34 +352,34 @@ void CLGenerateProposalsLayer::run() MemoryGroupResourceScope scope_mg(_memory_group); // Compute all the anchors - CLScheduler::get().enqueue(_compute_anchors_kernel, false); + CLScheduler::get().enqueue(*_compute_anchors_kernel, false); // Transpose and reshape the inputs if(!_is_nhwc) { - CLScheduler::get().enqueue(_permute_deltas_kernel, false); - CLScheduler::get().enqueue(_permute_scores_kernel, false); + CLScheduler::get().enqueue(*_permute_deltas_kernel, false); + CLScheduler::get().enqueue(*_permute_scores_kernel, false); } _flatten_deltas.run(); _flatten_scores.run(); if(_is_qasymm8) { - CLScheduler::get().enqueue(_dequantize_anchors, false); - CLScheduler::get().enqueue(_dequantize_deltas, false); + CLScheduler::get().enqueue(*_dequantize_anchors, false); + CLScheduler::get().enqueue(*_dequantize_deltas, false); } // Build the boxes - CLScheduler::get().enqueue(_bounding_box_kernel, false); + CLScheduler::get().enqueue(*_bounding_box_kernel, false); if(_is_qasymm8) { - CLScheduler::get().enqueue(_quantize_all_proposals, false); + CLScheduler::get().enqueue(*_quantize_all_proposals, false); } // Non maxima suppression run_cpp_nms_kernel(); // Add dummy batch indexes - CLScheduler::get().enqueue(_pad_kernel, true); + CLScheduler::get().enqueue(*_pad_kernel, true); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLHOGDescriptor.cpp b/src/runtime/CL/functions/CLHOGDescriptor.cpp index 21fa6690ea..80026532ab 100644 --- a/src/runtime/CL/functions/CLHOGDescriptor.cpp +++ b/src/runtime/CL/functions/CLHOGDescriptor.cpp @@ -28,14 +28,26 @@ #include "arm_compute/core/Size2D.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLHOGDescriptorKernel.h" +#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h" +#include "support/MemorySupport.h" using namespace arm_compute; CLHOGDescriptor::CLHOGDescriptor(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _gradient(), _orient_bin(), _block_norm(), _mag(), _phase(), _hog_space() + : _memory_group(std::move(memory_manager)), + _gradient(), + _orient_bin(support::cpp14::make_unique()), + _block_norm(support::cpp14::make_unique()), + _mag(), + _phase(), + _hog_space() { } +CLHOGDescriptor::~CLHOGDescriptor() = default; + void CLHOGDescriptor::configure(ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value) { configure(CLKernelLibrary::get().get_compile_context(), input, output, hog, border_mode, constant_border_value); @@ -87,10 +99,10 @@ void CLHOGDescriptor::configure(const CLCompileContext &compile_context, ICLTens _memory_group.manage(&_hog_space); // Initialise orientation binning kernel - _orient_bin.configure(compile_context, &_mag, &_phase, &_hog_space, hog->info()); + _orient_bin->configure(compile_context, &_mag, &_phase, &_hog_space, hog->info()); // Initialize HOG norm kernel - _block_norm.configure(compile_context, &_hog_space, output, hog->info()); + _block_norm->configure(compile_context, &_hog_space, output, hog->info()); // Allocate intermediate tensors _mag.allocator()->allocate(); @@ -106,8 +118,8 @@ void CLHOGDescriptor::run() _gradient.run(); // Run orientation binning - CLScheduler::get().enqueue(_orient_bin, false); + CLScheduler::get().enqueue(*_orient_bin, false); // Run block normalization - CLScheduler::get().enqueue(_block_norm); + CLScheduler::get().enqueue(*_block_norm); } \ No newline at end of file diff --git a/src/runtime/CL/functions/CLHOGDetector.cpp b/src/runtime/CL/functions/CLHOGDetector.cpp index 9188f654dc..07ae8151c0 100644 --- a/src/runtime/CL/functions/CLHOGDetector.cpp +++ b/src/runtime/CL/functions/CLHOGDetector.cpp @@ -23,19 +23,22 @@ */ #include "arm_compute/runtime/CL/functions/CLHOGDetector.h" -#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLHOGDetectorKernel.h" +#include "support/MemorySupport.h" #include using namespace arm_compute; CLHOGDetector::CLHOGDetector() - : _hog_detector_kernel(), _detection_windows(nullptr), _num_detection_windows() + : _hog_detector_kernel(support::cpp14::make_unique()), _detection_windows(nullptr), _num_detection_windows() { } +CLHOGDetector::~CLHOGDetector() = default; + void CLHOGDetector::configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold, size_t idx_class) { configure(CLKernelLibrary::get().get_compile_context(), input, hog, detection_windows, detection_window_stride, threshold, idx_class); @@ -50,7 +53,7 @@ void CLHOGDetector::configure(const CLCompileContext &compile_context, const ICL _num_detection_windows = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(unsigned int)); // Configure HOGDetectorKernel - _hog_detector_kernel.configure(compile_context, input, hog, detection_windows, &_num_detection_windows, detection_window_stride, threshold, idx_class); + _hog_detector_kernel->configure(compile_context, input, hog, detection_windows, &_num_detection_windows, detection_window_stride, threshold, idx_class); } void CLHOGDetector::run() @@ -62,7 +65,7 @@ void CLHOGDetector::run() q.enqueueWriteBuffer(_num_detection_windows, CL_FALSE, 0, sizeof(unsigned int), &init_num_detection_windows); // Run CLHOGDetectorKernel - CLScheduler::get().enqueue(_hog_detector_kernel); + CLScheduler::get().enqueue(*_hog_detector_kernel); // Read number of detections unsigned int num_detection_windows = 0; diff --git a/src/runtime/CL/functions/CLHOGGradient.cpp b/src/runtime/CL/functions/CLHOGGradient.cpp index 934d1f6351..5f3b9cf529 100644 --- a/src/runtime/CL/functions/CLHOGGradient.cpp +++ b/src/runtime/CL/functions/CLHOGGradient.cpp @@ -26,11 +26,18 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h" +#include "support/MemorySupport.h" using namespace arm_compute; CLHOGGradient::CLHOGGradient(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _derivative(), _mag_phase(), _gx(), _gy() + : _memory_group(std::move(memory_manager)), + _derivative(), + _mag_phase(support::cpp14::make_unique()), + _gx(), + _gy() { } @@ -63,11 +70,11 @@ void CLHOGGradient::configure(const CLCompileContext &compile_context, ICLTensor // Initialise magnitude/phase kernel if(PhaseType::UNSIGNED == phase_type) { - _mag_phase.configure(compile_context, &_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::UNSIGNED); + _mag_phase->configure(compile_context, &_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::UNSIGNED); } else { - _mag_phase.configure(compile_context, &_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::SIGNED); + _mag_phase->configure(compile_context, &_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::SIGNED); } // Allocate intermediate tensors @@ -83,5 +90,5 @@ void CLHOGGradient::run() _derivative.run(); // Run magnitude/phase kernel - CLScheduler::get().enqueue(_mag_phase); + CLScheduler::get().enqueue(*_mag_phase); } \ No newline at end of file diff --git a/src/runtime/CL/functions/CLHOGMultiDetection.cpp b/src/runtime/CL/functions/CLHOGMultiDetection.cpp index 51db43cd71..dfc90537cf 100644 --- a/src/runtime/CL/functions/CLHOGMultiDetection.cpp +++ b/src/runtime/CL/functions/CLHOGMultiDetection.cpp @@ -30,6 +30,11 @@ #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/Scheduler.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLHOGDescriptorKernel.h" +#include "src/core/CL/kernels/CLHOGDetectorKernel.h" +#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h" +#include "support/MemorySupport.h" using namespace arm_compute; @@ -52,6 +57,8 @@ CLHOGMultiDetection::CLHOGMultiDetection(std::shared_ptr memory_ { } +CLHOGMultiDetection::~CLHOGMultiDetection() = default; + void CLHOGMultiDetection::configure(ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides, BorderMode border_mode, uint8_t constant_border_value, float threshold, bool non_maxima_suppression, float min_distance) { @@ -135,8 +142,8 @@ void CLHOGMultiDetection::configure(const CLCompileContext &compile_context, ICL _num_block_norm_kernel = input_block_norm.size(); // Number of CLHOGBlockNormalizationKernel kernels to compute _num_hog_detect_kernel = input_hog_detect.size(); // Number of CLHOGDetector functions to compute - _orient_bin_kernel.resize(_num_orient_bin_kernel); - _block_norm_kernel.resize(_num_block_norm_kernel); + _orient_bin_kernel.reserve(_num_orient_bin_kernel); + _block_norm_kernel.reserve(_num_block_norm_kernel); _hog_detect_kernel.resize(_num_hog_detect_kernel); _hog_space.resize(_num_orient_bin_kernel); _hog_norm_space.resize(_num_block_norm_kernel); @@ -181,7 +188,8 @@ void CLHOGMultiDetection::configure(const CLCompileContext &compile_context, ICL _memory_group.manage(&_hog_space[i]); // Initialise orientation binning kernel - _orient_bin_kernel[i].configure(compile_context, &_mag, &_phase, &_hog_space[i], multi_hog->model(idx_multi_hog)->info()); + _orient_bin_kernel.emplace_back(support::cpp14::make_unique()); + _orient_bin_kernel.back()->configure(compile_context, &_mag, &_phase, &_hog_space[i], multi_hog->model(idx_multi_hog)->info()); } // Allocate intermediate tensors @@ -202,7 +210,8 @@ void CLHOGMultiDetection::configure(const CLCompileContext &compile_context, ICL _memory_group.manage(&_hog_norm_space[i]); // Initialize block normalization kernel - _block_norm_kernel[i].configure(compile_context, &_hog_space[idx_orient_bin], &_hog_norm_space[i], multi_hog->model(idx_multi_hog)->info()); + _block_norm_kernel.emplace_back(support::cpp14::make_unique()); + _block_norm_kernel.back()->configure(compile_context, &_hog_space[idx_orient_bin], &_hog_norm_space[i], multi_hog->model(idx_multi_hog)->info()); } // Allocate intermediate tensors @@ -248,13 +257,13 @@ void CLHOGMultiDetection::run() // Run orientation binning kernel for(size_t i = 0; i < _num_orient_bin_kernel; ++i) { - CLScheduler::get().enqueue(_orient_bin_kernel[i], false); + CLScheduler::get().enqueue(*_orient_bin_kernel[i], false); } // Run block normalization kernel for(size_t i = 0; i < _num_block_norm_kernel; ++i) { - CLScheduler::get().enqueue(_block_norm_kernel[i], false); + CLScheduler::get().enqueue(*_block_norm_kernel[i], false); } // Run HOG detector kernel diff --git a/src/runtime/CL/functions/CLHarrisCorners.cpp b/src/runtime/CL/functions/CLHarrisCorners.cpp index 45b93a5be0..9d8ebceb30 100644 --- a/src/runtime/CL/functions/CLHarrisCorners.cpp +++ b/src/runtime/CL/functions/CLHarrisCorners.cpp @@ -24,8 +24,6 @@ #include "arm_compute/runtime/CL/functions/CLHarrisCorners.h" #include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" -#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" @@ -35,6 +33,10 @@ #include "arm_compute/runtime/CL/functions/CLSobel7x7.h" #include "arm_compute/runtime/ITensorAllocator.h" #include "arm_compute/runtime/Scheduler.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLHarrisCornersKernel.h" +#include "src/core/CL/kernels/CLSobel5x5Kernel.h" +#include "src/core/CL/kernels/CLSobel7x7Kernel.h" #include "support/MemorySupport.h" #include @@ -45,12 +47,12 @@ using namespace arm_compute; CLHarrisCorners::CLHarrisCorners(std::shared_ptr memory_manager) // NOLINT : _memory_group(std::move(memory_manager)), _sobel(nullptr), - _harris_score(), + _harris_score(support::cpp14::make_unique()), _non_max_suppr(), _candidates(), _sort_euclidean(), - _border_gx(), - _border_gy(), + _border_gx(support::cpp14::make_unique()), + _border_gy(support::cpp14::make_unique()), _gx(), _gy(), _score(), @@ -61,6 +63,8 @@ CLHarrisCorners::CLHarrisCorners(std::shared_ptr memory_manager) { } +CLHarrisCorners::~CLHarrisCorners() = default; + void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist, float sensitivity, int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners, BorderMode border_mode, uint8_t constant_border_value, bool use_fp16) @@ -133,11 +137,11 @@ void CLHarrisCorners::configure(const CLCompileContext &compile_context, ICLImag _memory_group.manage(&_score); // Set/init Harris Score kernel accordingly with block_size - _harris_score.configure(compile_context, &_gx, &_gy, &_score, block_size, pow4_normalization_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); + _harris_score->configure(compile_context, &_gx, &_gy, &_score, block_size, pow4_normalization_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); // Configure border filling using harris score kernel's block size - _border_gx.configure(compile_context, &_gx, _harris_score.border_size(), border_mode, PixelValue(constant_border_value)); - _border_gy.configure(compile_context, &_gy, _harris_score.border_size(), border_mode, PixelValue(constant_border_value)); + _border_gx->configure(compile_context, &_gx, _harris_score->border_size(), border_mode, PixelValue(constant_border_value)); + _border_gy->configure(compile_context, &_gy, _harris_score->border_size(), border_mode, PixelValue(constant_border_value)); // Allocate intermediate buffers _gx.allocator()->allocate(); @@ -175,11 +179,11 @@ void CLHarrisCorners::run() _sobel->run(); // Fill border before harris score kernel - CLScheduler::get().enqueue(_border_gx, false); - CLScheduler::get().enqueue(_border_gy, false); + CLScheduler::get().enqueue(*_border_gx, false); + CLScheduler::get().enqueue(*_border_gy, false); // Run harris score kernel - CLScheduler::get().enqueue(_harris_score, false); + CLScheduler::get().enqueue(*_harris_score, false); // Run non-maxima suppression _non_max_suppr.run(); diff --git a/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp b/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp index 4a60ee9d08..bd680f448d 100644 --- a/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp +++ b/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp @@ -23,9 +23,9 @@ */ #include "arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h" -#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h" #include "arm_compute/core/Types.h" - +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLIntegralImage.cpp b/src/runtime/CL/functions/CLIntegralImage.cpp index 8561494242..41e47e77c7 100644 --- a/src/runtime/CL/functions/CLIntegralImage.cpp +++ b/src/runtime/CL/functions/CLIntegralImage.cpp @@ -23,16 +23,20 @@ */ #include "arm_compute/runtime/CL/functions/CLIntegralImage.h" -#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLIntegralImageKernel.h" +#include "support/MemorySupport.h" using namespace arm_compute; CLIntegralImage::CLIntegralImage() - : _integral_hor(), _integral_vert() + : _integral_hor(support::cpp14::make_unique()), + _integral_vert(support::cpp14::make_unique()) { } +CLIntegralImage::~CLIntegralImage() = default; + void CLIntegralImage::configure(const ICLTensor *input, ICLTensor *output) { configure(CLKernelLibrary::get().get_compile_context(), input, output); @@ -40,12 +44,12 @@ void CLIntegralImage::configure(const ICLTensor *input, ICLTensor *output) void CLIntegralImage::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { - _integral_hor.configure(compile_context, input, output); - _integral_vert.configure(compile_context, output); + _integral_hor->configure(compile_context, input, output); + _integral_vert->configure(compile_context, output); } void CLIntegralImage::run() { - CLScheduler::get().enqueue(_integral_hor, false); - CLScheduler::get().enqueue(_integral_vert); + CLScheduler::get().enqueue(*_integral_hor, false); + CLScheduler::get().enqueue(*_integral_vert); } diff --git a/src/runtime/CL/functions/CLL2NormalizeLayer.cpp b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp index 66191d1799..64aac269cd 100644 --- a/src/runtime/CL/functions/CLL2NormalizeLayer.cpp +++ b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp @@ -24,12 +24,15 @@ #include "arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h" +#include "src/core/CL/kernels/CLReductionOperationKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { @@ -39,10 +42,15 @@ constexpr int max_input_tensor_dim = 3; } // namespace CLL2NormalizeLayer::CLL2NormalizeLayer(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq() + : _memory_group(std::move(memory_manager)), + _reduce_func(), + _normalize_kernel(support::cpp14::make_unique()), + _sumsq() { } +CLL2NormalizeLayer::~CLL2NormalizeLayer() = default; + void CLL2NormalizeLayer::configure(ICLTensor *input, ICLTensor *output, int axis, float epsilon) { configure(CLKernelLibrary::get().get_compile_context(), input, output, axis, epsilon); @@ -59,7 +67,7 @@ void CLL2NormalizeLayer::configure(const CLCompileContext &compile_context, ICLT // Configure kernels const uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim); _reduce_func.configure(compile_context, input, &_sumsq, actual_axis, ReductionOperation::SUM_SQUARE); - _normalize_kernel.configure(compile_context, input, &_sumsq, output, axis, epsilon); + _normalize_kernel->configure(compile_context, input, &_sumsq, output, axis, epsilon); // Allocate intermediate tensor _sumsq.allocator()->allocate(); @@ -91,6 +99,6 @@ void CLL2NormalizeLayer::run() MemoryGroupResourceScope scope_mg(_memory_group); _reduce_func.run(); - CLScheduler::get().enqueue(_normalize_kernel, true); + CLScheduler::get().enqueue(*_normalize_kernel, true); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp index 058b6027c2..b095c06535 100644 --- a/src/runtime/CL/functions/CLLSTMLayer.cpp +++ b/src/runtime/CL/functions/CLLSTMLayer.cpp @@ -29,6 +29,22 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLCopyKernel.h" +#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/core/CL/kernels/CLMemsetKernel.h" +#include "src/core/CL/kernels/CLTransposeKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { @@ -37,20 +53,23 @@ using namespace arm_compute::utils::info_helpers; CLLSTMLayer::CLLSTMLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _fully_connected_input_gate(), _accum_input_gate1(), _subtract_input_gate(), _pixelwise_mul_input_gate(), _activation_input_gate(), - _fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(), _transpose_cell_state(), - _accum_cell_state1(), _accum_cell_state2(), _pixelwise_mul_cell_state1(), _activation_cell_state(), _cell_clip(), _pixelwise_mul_cell_state2(), _fully_connected_output(), - _pixelwise_mul_output_state1(), _accum_output1(), _activation_output(), _activation_output_state(), _pixelwise_mul_output_state2(), _fully_connected_output_state(), _projection_clip(), - _copy_cell_state(), _copy_output(), _concat_scratch_buffer(), _concat_inputs_forget_gate(), _concat_weights_forget_gate(), _concat_weights_input_gate(), _concat_weights_output(), - _ones_memset_kernel(), _mean_std_norm_input_gate(), _pixelwise_mul_input_gate_coeff(), _accum_input_gate_bias(), _mean_std_norm_forget_gate(), _pixelwise_mul_forget_gate_coeff(), - _accum_forget_gate_bias(), _mean_std_norm_cell_gate(), _pixelwise_mul_cell_gate_coeff(), _accum_cell_gate_bias(), _mean_std_norm_output_gate(), _pixelwise_mul_output_gate_coeff(), - _accum_output_gate_bias(), _input_gate_out1(), _input_gate_out2(), _input_gate_out3(), _input_gate_out4(), _forget_gate_out1(), _forget_gate_out2(), _forget_gate_out3(), _forget_gate_out4(), - _forget_gate_out5(), _forget_gate_out6(), _cell_state_out1(), _cell_state_out2(), _cell_state_out3(), _cell_state_out4(), _cell_state_out5(), _output1(), _output2(), _output3(), _output4(), - _cell_state_activation(), _output_state1(), _ones(), _input_layer_norm_out1(), _input_layer_norm_out2(), _forget_layer_norm_out1(), _forget_layer_norm_out2(), _cell_layer_norm_out1(), - _cell_layer_norm_out2(), _output_layer_norm_out1(), _output_layer_norm_out2(), _run_peephole_opt(false), _run_cifg_opt(false), _perform_cell_clipping(false), _has_projection_weights(false), - _perform_projection_clipping(false), _is_prepared(false), _is_layer_norm_lstm(false) + _fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(), + _transpose_cell_state(support::cpp14::make_unique()), _accum_cell_state1(), _accum_cell_state2(), _pixelwise_mul_cell_state1(), _activation_cell_state(), _cell_clip(), + _pixelwise_mul_cell_state2(), _fully_connected_output(), _pixelwise_mul_output_state1(), _accum_output1(), _activation_output(), _activation_output_state(), _pixelwise_mul_output_state2(), + _fully_connected_output_state(), _projection_clip(), _copy_cell_state(support::cpp14::make_unique()), _copy_output(support::cpp14::make_unique()), _concat_scratch_buffer(), + _concat_inputs_forget_gate(), _concat_weights_forget_gate(), _concat_weights_input_gate(), _concat_weights_output(), _ones_memset_kernel(support::cpp14::make_unique()), + _mean_std_norm_input_gate(), _pixelwise_mul_input_gate_coeff(), _accum_input_gate_bias(), _mean_std_norm_forget_gate(), _pixelwise_mul_forget_gate_coeff(), _accum_forget_gate_bias(), + _mean_std_norm_cell_gate(), _pixelwise_mul_cell_gate_coeff(), _accum_cell_gate_bias(), _mean_std_norm_output_gate(), _pixelwise_mul_output_gate_coeff(), _accum_output_gate_bias(), _input_gate_out1(), + _input_gate_out2(), _input_gate_out3(), _input_gate_out4(), _forget_gate_out1(), _forget_gate_out2(), _forget_gate_out3(), _forget_gate_out4(), _forget_gate_out5(), _forget_gate_out6(), + _cell_state_out1(), _cell_state_out2(), _cell_state_out3(), _cell_state_out4(), _cell_state_out5(), _output1(), _output2(), _output3(), _output4(), _cell_state_activation(), _output_state1(), _ones(), + _input_layer_norm_out1(), _input_layer_norm_out2(), _forget_layer_norm_out1(), _forget_layer_norm_out2(), _cell_layer_norm_out1(), _cell_layer_norm_out2(), _output_layer_norm_out1(), + _output_layer_norm_out2(), _run_peephole_opt(false), _run_cifg_opt(false), _perform_cell_clipping(false), _has_projection_weights(false), _perform_projection_clipping(false), _is_prepared(false), + _is_layer_norm_lstm(false) { } +CLLSTMLayer::~CLLSTMLayer() = default; + void CLLSTMLayer::configure(const ICLTensor *input, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, @@ -172,7 +191,7 @@ void CLLSTMLayer::configure(const CLCompileContext &compile_context, const ICLTe { _memory_group.manage(&_input_gate_out1); _ones.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); - _ones_memset_kernel.configure(compile_context, &_ones, PixelValue(1, _ones.info()->data_type())); + _ones_memset_kernel->configure(compile_context, &_ones, PixelValue(1, _ones.info()->data_type())); _subtract_input_gate.configure(compile_context, &_ones, forget_gate_out, &_input_gate_out1, ConvertPolicy::SATURATE); _ones.allocator()->allocate(); _run_cifg_opt = true; @@ -241,7 +260,7 @@ void CLLSTMLayer::configure(const CLCompileContext &compile_context, const ICLTe _memory_group.manage(&_cell_state_out1); _fully_connected_cell_state.configure(compile_context, input, input_to_cell_weights, (_is_layer_norm_lstm) ? nullptr : cell_bias, &_cell_state_out1); _memory_group.manage(&_cell_state_out2); - _transpose_cell_state.configure(compile_context, recurrent_to_cell_weights, &_cell_state_out2); + _transpose_cell_state->configure(compile_context, recurrent_to_cell_weights, &_cell_state_out2); _memory_group.manage(&_cell_state_out3); _gemm_cell_state1.configure(compile_context, output_state_in, &_cell_state_out2, nullptr, &_cell_state_out3, 1.f, 0.f); _cell_state_out2.allocator()->allocate(); @@ -367,8 +386,8 @@ void CLLSTMLayer::configure(const CLCompileContext &compile_context, const ICLTe } // Copy cell state and output - _copy_cell_state.configure(compile_context, &_cell_state_out1, cell_state_out); - _copy_output.configure(compile_context, output_state_out, output); + _copy_cell_state->configure(compile_context, &_cell_state_out1, cell_state_out); + _copy_output->configure(compile_context, output_state_out, output); // Vector for holding the tensors to store in scratch buffer std::vector scratch_inputs; @@ -642,7 +661,7 @@ void CLLSTMLayer::run() if(_run_cifg_opt) { - CLScheduler::get().enqueue(_ones_memset_kernel); + CLScheduler::get().enqueue(*_ones_memset_kernel); _subtract_input_gate.run(); } else @@ -665,7 +684,7 @@ void CLLSTMLayer::run() } _fully_connected_cell_state.run(); - CLScheduler::get().enqueue(_transpose_cell_state); + CLScheduler::get().enqueue(*_transpose_cell_state); _gemm_cell_state1.run(); _accum_cell_state1.run(); if(_is_layer_norm_lstm) @@ -711,8 +730,8 @@ void CLLSTMLayer::run() } } - CLScheduler::get().enqueue(_copy_cell_state); - CLScheduler::get().enqueue(_copy_output); + CLScheduler::get().enqueue(*_copy_cell_state); + CLScheduler::get().enqueue(*_copy_output); _concat_scratch_buffer.run(); } diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp index 76a531b1c9..46062387e7 100644 --- a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp +++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp @@ -27,6 +27,14 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include diff --git a/src/runtime/CL/functions/CLLaplacianPyramid.cpp b/src/runtime/CL/functions/CLLaplacianPyramid.cpp index 81e903cde8..1ad19e56ea 100644 --- a/src/runtime/CL/functions/CLLaplacianPyramid.cpp +++ b/src/runtime/CL/functions/CLLaplacianPyramid.cpp @@ -32,6 +32,9 @@ #include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" #include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" #include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLGaussian5x5Kernel.h" +#include "src/core/CL/kernels/CLGaussianPyramidKernel.h" using namespace arm_compute; diff --git a/src/runtime/CL/functions/CLLaplacianReconstruct.cpp b/src/runtime/CL/functions/CLLaplacianReconstruct.cpp index cbb952c3f6..d7fd81754b 100644 --- a/src/runtime/CL/functions/CLLaplacianReconstruct.cpp +++ b/src/runtime/CL/functions/CLLaplacianReconstruct.cpp @@ -23,11 +23,13 @@ */ #include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/IPyramid.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" #include diff --git a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp index 74cb47347f..04e59ac4a6 100644 --- a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp @@ -27,6 +27,11 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLCol2ImKernel.h" +#include "src/core/CL/kernels/CLIm2ColKernel.h" +#include "src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" +#include "src/core/CL/kernels/CLWeightsReshapeKernel.h" +#include "support/MemorySupport.h" #include #include @@ -78,8 +83,16 @@ void calculate_shapes(const ITensorInfo *input, const ITensorInfo *weights, cons } // namespace CLLocallyConnectedLayer::CLLocallyConnectedLayer(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(), - _is_prepared(false), _original_weights(nullptr) + : _memory_group(std::move(memory_manager)), + _input_im2col_kernel(support::cpp14::make_unique()), + _weights_reshape_kernel(support::cpp14::make_unique()), + _mm_kernel(support::cpp14::make_unique()), + _output_col2im_kernel(support::cpp14::make_unique()), + _input_im2col_reshaped(), + _weights_reshaped(), + _gemm_output(), + _is_prepared(false), + _original_weights(nullptr) { } @@ -169,16 +182,16 @@ void CLLocallyConnectedLayer::configure(const CLCompileContext &compile_context, _memory_group.manage(&_gemm_output); // Configure kernels - _input_im2col_kernel.configure(compile_context, input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias); - _weights_reshape_kernel.configure(compile_context, weights, biases, &_weights_reshaped); - _mm_kernel.configure(compile_context, &_input_im2col_reshaped, &_weights_reshaped, &_gemm_output); - _output_col2im_kernel.configure(compile_context, &_gemm_output, output, Size2D(conv_w, conv_h)); + _input_im2col_kernel->configure(compile_context, input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias); + _weights_reshape_kernel->configure(compile_context, weights, biases, &_weights_reshaped); + _mm_kernel->configure(compile_context, &_input_im2col_reshaped, &_weights_reshaped, &_gemm_output); + _output_col2im_kernel->configure(compile_context, &_gemm_output, output, Size2D(conv_w, conv_h)); // Allocate intermediate tensors _input_im2col_reshaped.allocator()->allocate(); _gemm_output.allocator()->allocate(); - CLScheduler::get().tune_kernel_static(_input_im2col_kernel); + CLScheduler::get().tune_kernel_static(*_input_im2col_kernel); } void CLLocallyConnectedLayer::run() @@ -188,13 +201,13 @@ void CLLocallyConnectedLayer::run() MemoryGroupResourceScope scope_mg(_memory_group); // Run input reshaping - CLScheduler::get().enqueue(_input_im2col_kernel); + CLScheduler::get().enqueue(*_input_im2col_kernel); // Runs vector matrix multiply on reshaped matrices - CLScheduler::get().enqueue(_mm_kernel); + CLScheduler::get().enqueue(*_mm_kernel); // Reshape output matrix - CLScheduler::get().enqueue(_output_col2im_kernel, false); + CLScheduler::get().enqueue(*_output_col2im_kernel.get(), false); } void CLLocallyConnectedLayer::prepare() @@ -205,7 +218,7 @@ void CLLocallyConnectedLayer::prepare() // Run weights reshaping and mark original weights tensor as unused _weights_reshaped.allocator()->allocate(); - CLScheduler::get().enqueue(_weights_reshape_kernel); + CLScheduler::get().enqueue(*_weights_reshape_kernel); _original_weights->mark_as_unused(); CLScheduler::get().queue().finish(); diff --git a/src/runtime/CL/functions/CLMagnitude.cpp b/src/runtime/CL/functions/CLMagnitude.cpp index 962adadbb2..fb3ebdaa96 100644 --- a/src/runtime/CL/functions/CLMagnitude.cpp +++ b/src/runtime/CL/functions/CLMagnitude.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLMagnitude.h" -#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" +#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp b/src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp index 3e32c55067..392bff2b4e 100644 --- a/src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp +++ b/src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp @@ -24,18 +24,23 @@ #include "arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h" +#include "src/core/CL/kernels/CLMemsetKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { CLMaxUnpoolingLayer::CLMaxUnpoolingLayer() - : _memset_kernel(), _unpooling_layer_kernel() + : _memset_kernel(support::cpp14::make_unique()), + _unpooling_layer_kernel(support::cpp14::make_unique()) { } +CLMaxUnpoolingLayer::~CLMaxUnpoolingLayer() = default; + void CLMaxUnpoolingLayer::configure(ICLTensor *input, ICLTensor *indices, ICLTensor *output, const PoolingLayerInfo &pool_info) { configure(CLKernelLibrary::get().get_compile_context(), input, indices, output, pool_info); @@ -44,9 +49,9 @@ void CLMaxUnpoolingLayer::configure(ICLTensor *input, ICLTensor *indices, ICLTen void CLMaxUnpoolingLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *indices, ICLTensor *output, const PoolingLayerInfo &pool_info) { const PixelValue zero_value(0.f); - _memset_kernel.configure(output, zero_value); + _memset_kernel->configure(output, zero_value); - _unpooling_layer_kernel.configure(compile_context, input, indices, output, pool_info); + _unpooling_layer_kernel->configure(compile_context, input, indices, output, pool_info); } Status CLMaxUnpoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info) @@ -57,9 +62,9 @@ Status CLMaxUnpoolingLayer::validate(const ITensorInfo *input, const ITensorInfo void CLMaxUnpoolingLayer::run() { // Run memset - CLScheduler::get().enqueue(_memset_kernel, false); + CLScheduler::get().enqueue(*_memset_kernel, false); // Run max unpooling layer - CLScheduler::get().enqueue(_unpooling_layer_kernel); + CLScheduler::get().enqueue(*_unpooling_layer_kernel); } } /* namespace arm_compute */ diff --git a/src/runtime/CL/functions/CLMeanStdDev.cpp b/src/runtime/CL/functions/CLMeanStdDev.cpp index 2517fdc4ef..c91bc954b8 100644 --- a/src/runtime/CL/functions/CLMeanStdDev.cpp +++ b/src/runtime/CL/functions/CLMeanStdDev.cpp @@ -25,6 +25,10 @@ #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/functions/CLMeanStdDev.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLMeanStdDevKernel.h" +#include "src/core/CL/kernels/CLReductionOperationKernel.h" +#include "support/MemorySupport.h" using namespace arm_compute; @@ -39,13 +43,15 @@ CLMeanStdDev::CLMeanStdDev(std::shared_ptr memory_manager) // NO _reduction_output_stddev(), _mean(nullptr), _stddev(nullptr), - _mean_stddev_kernel(), - _fill_border_kernel(), + _mean_stddev_kernel(support::cpp14::make_unique()), + _fill_border_kernel(support::cpp14::make_unique()), _global_sum(), _global_sum_squared() { } +CLMeanStdDev::~CLMeanStdDev() = default; + Status CLMeanStdDev::validate(ITensorInfo *input, float *mean, float *stddev) { ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(input); @@ -101,8 +107,8 @@ void CLMeanStdDev::configure(const CLCompileContext &compile_context, ICLImage * _global_sum_squared = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong)); } - _mean_stddev_kernel.configure(compile_context, input, mean, &_global_sum, stddev, &_global_sum_squared); - _fill_border_kernel.configure(compile_context, input, _mean_stddev_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0))); + _mean_stddev_kernel->configure(compile_context, input, mean, &_global_sum, stddev, &_global_sum_squared); + _fill_border_kernel->configure(compile_context, input, _mean_stddev_kernel->border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0))); } } @@ -149,8 +155,8 @@ void CLMeanStdDev::run_float() void CLMeanStdDev::run_int() { - CLScheduler::get().enqueue(_fill_border_kernel); - CLScheduler::get().enqueue(_mean_stddev_kernel); + CLScheduler::get().enqueue(*_fill_border_kernel); + CLScheduler::get().enqueue(*_mean_stddev_kernel); } void CLMeanStdDev::run() diff --git a/src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp b/src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp index 07ab669fde..5b5ff49ecb 100644 --- a/src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp +++ b/src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp @@ -23,8 +23,8 @@ */ #include "arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h" -#include "arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLMedian3x3.cpp b/src/runtime/CL/functions/CLMedian3x3.cpp index 92153128f9..2040ebd4f5 100644 --- a/src/runtime/CL/functions/CLMedian3x3.cpp +++ b/src/runtime/CL/functions/CLMedian3x3.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/runtime/CL/functions/CLMedian3x3.h" -#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLMedian3x3Kernel.h" #include "support/MemorySupport.h" #include @@ -41,5 +42,5 @@ void CLMedian3x3::configure(const CLCompileContext &compile_context, ICLTensor * auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); } diff --git a/src/runtime/CL/functions/CLMinMaxLocation.cpp b/src/runtime/CL/functions/CLMinMaxLocation.cpp index a27defe2f7..3ddd4d04ed 100644 --- a/src/runtime/CL/functions/CLMinMaxLocation.cpp +++ b/src/runtime/CL/functions/CLMinMaxLocation.cpp @@ -22,14 +22,15 @@ * SOFTWARE. */ #include "arm_compute/runtime/CL/functions/CLMinMaxLocation.h" - #include "arm_compute/core/CL/CLHelpers.h" +#include "src/core/CL/kernels/CLMinMaxLocationKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { CLMinMaxLocation::CLMinMaxLocation() - : _min_max_kernel(), - _min_max_loc_kernel(), + : _min_max_kernel(support::cpp14::make_unique()), + _min_max_loc_kernel(support::cpp14::make_unique()), _min_max_vals(), _min_max_count_vals(), _min(nullptr), @@ -41,6 +42,8 @@ CLMinMaxLocation::CLMinMaxLocation() { } +CLMinMaxLocation::~CLMinMaxLocation() = default; + void CLMinMaxLocation::configure(const ICLImage *input, void *min, void *max, CLCoordinates2DArray *min_loc, CLCoordinates2DArray *max_loc, uint32_t *min_count, uint32_t *max_count) { configure(CLKernelLibrary::get().get_compile_context(), input, min, max, min_loc, max_loc, min_count, max_count); @@ -62,16 +65,16 @@ void CLMinMaxLocation::configure(const CLCompileContext &compile_context, const _min_loc = min_loc; _max_loc = max_loc; - _min_max_kernel.configure(compile_context, input, &_min_max_vals); - _min_max_loc_kernel.configure(compile_context, input, &_min_max_vals, &_min_max_count_vals, _min_loc, _max_loc); + _min_max_kernel->configure(compile_context, input, &_min_max_vals); + _min_max_loc_kernel->configure(compile_context, input, &_min_max_vals, &_min_max_count_vals, _min_loc, _max_loc); } void CLMinMaxLocation::run() { cl::CommandQueue q = CLScheduler::get().queue(); - CLScheduler::get().enqueue(_min_max_kernel, false); - CLScheduler::get().enqueue(_min_max_loc_kernel, false); + CLScheduler::get().enqueue(*_min_max_kernel, false); + CLScheduler::get().enqueue(*_min_max_loc_kernel, false); // Update min and max q.enqueueReadBuffer(_min_max_vals, CL_FALSE, 0 * sizeof(int32_t), sizeof(int32_t), static_cast(_min)); diff --git a/src/runtime/CL/functions/CLNonLinearFilter.cpp b/src/runtime/CL/functions/CLNonLinearFilter.cpp index 71f08e8072..3312f6f9a7 100644 --- a/src/runtime/CL/functions/CLNonLinearFilter.cpp +++ b/src/runtime/CL/functions/CLNonLinearFilter.cpp @@ -23,7 +23,8 @@ */ #include "arm_compute/runtime/CL/functions/CLNonLinearFilter.h" -#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLNonLinearFilterKernel.h" #include "support/MemorySupport.h" #include @@ -42,5 +43,5 @@ void CLNonLinearFilter::configure(const CLCompileContext &compile_context, ICLTe auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, input, output, function, mask_size, pattern, mask, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); } diff --git a/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp b/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp index a79bb0c5a3..22ca176a71 100644 --- a/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp +++ b/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp @@ -23,7 +23,8 @@ */ #include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h" -#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h" #include "support/MemorySupport.h" #include @@ -43,10 +44,10 @@ void CLNonMaximaSuppression3x3::configure(const CLCompileContext &compile_contex if(border_mode != BorderMode::UNDEFINED) { - _border_handler.configure(compile_context, input, _kernel->border_size(), BorderMode::CONSTANT); + _border_handler->configure(compile_context, input, _kernel->border_size(), BorderMode::CONSTANT); } else { - _border_handler.configure(compile_context, input, _kernel->border_size(), BorderMode::UNDEFINED); + _border_handler->configure(compile_context, input, _kernel->border_size(), BorderMode::UNDEFINED); } } diff --git a/src/runtime/CL/functions/CLNormalizationLayer.cpp b/src/runtime/CL/functions/CLNormalizationLayer.cpp index 4be6257bbf..40a6cdd2f4 100644 --- a/src/runtime/CL/functions/CLNormalizationLayer.cpp +++ b/src/runtime/CL/functions/CLNormalizationLayer.cpp @@ -25,18 +25,25 @@ #include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h" #include "arm_compute/core/Error.h" +#include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLNormalizationLayerKernel.h" +#include "support/MemorySupport.h" using namespace arm_compute; CLNormalizationLayer::CLNormalizationLayer() - : _norm_kernel(), _border_handler() + : _norm_kernel(support::cpp14::make_unique()), + _border_handler(support::cpp14::make_unique()) { } +CLNormalizationLayer::~CLNormalizationLayer() = default; + void CLNormalizationLayer::configure(ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info) { configure(CLKernelLibrary::get().get_compile_context(), input, output, norm_info); @@ -47,10 +54,10 @@ void CLNormalizationLayer::configure(const CLCompileContext &compile_context, IC ARM_COMPUTE_ERROR_ON(input == nullptr); // Configure normalization kernel - _norm_kernel.configure(compile_context, input, output, norm_info); + _norm_kernel->configure(compile_context, input, output, norm_info); // Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel - _border_handler.configure(compile_context, input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue()); + _border_handler->configure(compile_context, input, _norm_kernel->border_size(), BorderMode::CONSTANT, PixelValue()); } Status CLNormalizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const NormalizationLayerInfo &norm_info) @@ -61,8 +68,8 @@ Status CLNormalizationLayer::validate(const ITensorInfo *input, const ITensorInf void CLNormalizationLayer::run() { // Run border handler - CLScheduler::get().enqueue(_border_handler, false); + CLScheduler::get().enqueue(*_border_handler, false); // Run normalization kernel - CLScheduler::get().enqueue(_norm_kernel); + CLScheduler::get().enqueue(*_norm_kernel); } diff --git a/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp b/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp index 806e6489a2..9576486db0 100644 --- a/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp +++ b/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp @@ -24,7 +24,7 @@ #include "arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h" -#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h" +#include "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLOpticalFlow.cpp b/src/runtime/CL/functions/CLOpticalFlow.cpp index 0b5547eaab..fca6192296 100644 --- a/src/runtime/CL/functions/CLOpticalFlow.cpp +++ b/src/runtime/CL/functions/CLOpticalFlow.cpp @@ -24,7 +24,6 @@ #include "arm_compute/runtime/CL/functions/CLOpticalFlow.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Window.h" @@ -33,6 +32,8 @@ #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/runtime/CL/functions/CLScharr3x3.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLLKTrackerKernel.h" #include "support/MemorySupport.h" using namespace arm_compute; @@ -42,7 +43,7 @@ CLOpticalFlow::CLOpticalFlow(std::shared_ptr memory_manager) // _tracker_init_kernel(), _tracker_stage0_kernel(), _tracker_stage1_kernel(), - _tracker_finalize_kernel(), + _tracker_finalize_kernel(support::cpp14::make_unique()), _func_scharr(), _scharr_gx(), _scharr_gy(), @@ -57,6 +58,8 @@ CLOpticalFlow::CLOpticalFlow(std::shared_ptr memory_manager) // { } +CLOpticalFlow::~CLOpticalFlow() = default; + void CLOpticalFlow::configure(const CLPyramid *old_pyramid, const CLPyramid *new_pyramid, const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points, Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate, @@ -93,9 +96,9 @@ void CLOpticalFlow::configure(const CLCompileContext &compile_context, const CLP const int old_values_list_length = list_length * window_dimension * window_dimension; // Create kernels and tensors - _tracker_init_kernel.resize(_num_levels); - _tracker_stage0_kernel.resize(_num_levels); - _tracker_stage1_kernel.resize(_num_levels); + _tracker_init_kernel.reserve(_num_levels); + _tracker_stage0_kernel.reserve(_num_levels); + _tracker_stage1_kernel.reserve(_num_levels); _func_scharr.resize(_num_levels); _scharr_gx.resize(_num_levels); _scharr_gy.resize(_num_levels); @@ -134,16 +137,19 @@ void CLOpticalFlow::configure(const CLCompileContext &compile_context, const CLP _func_scharr[i].configure(compile_context, old_ith_input, &_scharr_gx[i], &_scharr_gy[i], border_mode, constant_border_value); // Init Lucas-Kanade init kernel - _tracker_init_kernel[i].configure(compile_context, old_points, new_points_estimates, _old_points_internal.get(), _new_points_internal.get(), use_initial_estimate, i, _num_levels, pyr_scale); + _tracker_init_kernel.emplace_back(support::cpp14::make_unique()); + _tracker_init_kernel.back()->configure(compile_context, old_points, new_points_estimates, _old_points_internal.get(), _new_points_internal.get(), use_initial_estimate, i, _num_levels, pyr_scale); // Init Lucas-Kanade stage0 kernel - _tracker_stage0_kernel[i].configure(compile_context, old_ith_input, &_scharr_gx[i], &_scharr_gy[i], - _old_points_internal.get(), _new_points_internal.get(), _coefficient_table.get(), _old_values.get(), - window_dimension, i); + _tracker_stage0_kernel.emplace_back(support::cpp14::make_unique()); + _tracker_stage0_kernel.back()->configure(compile_context, old_ith_input, &_scharr_gx[i], &_scharr_gy[i], + _old_points_internal.get(), _new_points_internal.get(), _coefficient_table.get(), _old_values.get(), + window_dimension, i); // Init Lucas-Kanade stage1 kernel - _tracker_stage1_kernel[i].configure(compile_context, new_ith_input, _new_points_internal.get(), _coefficient_table.get(), _old_values.get(), - termination, epsilon, num_iterations, window_dimension, i); + _tracker_stage1_kernel.emplace_back(support::cpp14::make_unique()); + _tracker_stage1_kernel.back()->configure(compile_context, new_ith_input, _new_points_internal.get(), _coefficient_table.get(), _old_values.get(), + termination, epsilon, num_iterations, window_dimension, i); // Allocate intermediate buffers _scharr_gx[i].allocator()->allocate(); @@ -151,7 +157,7 @@ void CLOpticalFlow::configure(const CLCompileContext &compile_context, const CLP } // Finalize Lucas-Kanade - _tracker_finalize_kernel.configure(compile_context, _new_points_internal.get(), new_points); + _tracker_finalize_kernel->configure(compile_context, _new_points_internal.get(), new_points); } void CLOpticalFlow::run() @@ -166,14 +172,14 @@ void CLOpticalFlow::run() _func_scharr[level - 1].run(); // Run Lucas-Kanade init kernel - CLScheduler::get().enqueue(_tracker_init_kernel[level - 1]); + CLScheduler::get().enqueue(*_tracker_init_kernel[level - 1]); // Run Lucas-Kanade stage0 kernel - CLScheduler::get().enqueue(_tracker_stage0_kernel[level - 1]); + CLScheduler::get().enqueue(*_tracker_stage0_kernel[level - 1]); // Run Lucas-Kanade stage1 kernel - CLScheduler::get().enqueue(_tracker_stage1_kernel[level - 1]); + CLScheduler::get().enqueue(*_tracker_stage1_kernel[level - 1]); } - CLScheduler::get().enqueue(_tracker_finalize_kernel, true); + CLScheduler::get().enqueue(*_tracker_finalize_kernel, true); } diff --git a/src/runtime/CL/functions/CLPReluLayer.cpp b/src/runtime/CL/functions/CLPReluLayer.cpp index aaddd46071..60cf4d1a2d 100644 --- a/src/runtime/CL/functions/CLPReluLayer.cpp +++ b/src/runtime/CL/functions/CLPReluLayer.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" +#include "src/core/CL/kernels/CLElementwiseOperationKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/runtime/CL/CLScheduler.h" diff --git a/src/runtime/CL/functions/CLPadLayer.cpp b/src/runtime/CL/functions/CLPadLayer.cpp index fb6078cc79..388b07b76e 100644 --- a/src/runtime/CL/functions/CLPadLayer.cpp +++ b/src/runtime/CL/functions/CLPadLayer.cpp @@ -22,14 +22,21 @@ * SOFTWARE. */ #include "arm_compute/runtime/CL/functions/CLPadLayer.h" +#include "src/core/CL/kernels/CLCopyKernel.h" +#include "src/core/CL/kernels/CLPadLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { CLPadLayer::CLPadLayer() - : _pad_kernel(), _copy_kernel(), _perform_pad(false) + : _pad_kernel(support::cpp14::make_unique()), + _copy_kernel(support::cpp14::make_unique()), + _perform_pad(false) { } +CLPadLayer::~CLPadLayer() = default; + void CLPadLayer::configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode) { configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, constant_value, mode); @@ -46,12 +53,12 @@ void CLPadLayer::configure(const CLCompileContext &compile_context, ICLTensor *i if(_perform_pad) { - _pad_kernel.configure(compile_context, input, output, padding, constant_value, mode); + _pad_kernel->configure(compile_context, input, output, padding, constant_value, mode); } else { // Copy the input to the whole output if no padding is applied - _copy_kernel.configure(compile_context, input, output); + _copy_kernel->configure(compile_context, input, output); } } Status CLPadLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode) @@ -75,11 +82,11 @@ void CLPadLayer::run() { if(_perform_pad) { - CLScheduler::get().enqueue(_pad_kernel); + CLScheduler::get().enqueue(*_pad_kernel); } else { - CLScheduler::get().enqueue(_copy_kernel); + CLScheduler::get().enqueue(*_copy_kernel); } } } // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/CL/functions/CLPermute.cpp b/src/runtime/CL/functions/CLPermute.cpp index e13046bd46..f7f0bc4f5d 100644 --- a/src/runtime/CL/functions/CLPermute.cpp +++ b/src/runtime/CL/functions/CLPermute.cpp @@ -24,8 +24,8 @@ #include "arm_compute/runtime/CL/functions/CLPermute.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLPermuteKernel.h" #include "arm_compute/core/Error.h" +#include "src/core/CL/kernels/CLPermuteKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLPhase.cpp b/src/runtime/CL/functions/CLPhase.cpp index 64d2e0fdff..6594cd5bac 100644 --- a/src/runtime/CL/functions/CLPhase.cpp +++ b/src/runtime/CL/functions/CLPhase.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLPhase.h" -#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" +#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp index 883ce68536..12cc5d60af 100644 --- a/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp +++ b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp @@ -24,8 +24,9 @@ #include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" #include "support/MemorySupport.h" #include @@ -55,7 +56,7 @@ ITensorPack select_border_input(ITensorPack &tensors) namespace experimental { CLPixelWiseMultiplication::CLPixelWiseMultiplication() - : _border_handler() + : _border_handler(support::cpp14::make_unique()) { } @@ -72,7 +73,7 @@ void CLPixelWiseMultiplication::configure(const CLCompileContext &compile_contex if(broadcasted_info->dimension(0) == 1) { - _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); + _border_handler->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); } } } @@ -86,12 +87,12 @@ Status CLPixelWiseMultiplication::validate(const ITensorInfo *input1, const ITen void CLPixelWiseMultiplication::run(ITensorPack &tensors) { auto border_pack = select_border_input(tensors); - CLScheduler::get().enqueue_op(_border_handler, border_pack); + CLScheduler::get().enqueue_op(*_border_handler, border_pack); ICLOperator::run(tensors); } CLComplexPixelWiseMultiplication::CLComplexPixelWiseMultiplication() - : _border_handler() + : _border_handler(support::cpp14::make_unique()) { } @@ -107,7 +108,7 @@ void CLComplexPixelWiseMultiplication::configure(const CLCompileContext &compile if(broadcasted_info->dimension(0) == 1) { - _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); + _border_handler->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); } } } @@ -120,7 +121,7 @@ Status CLComplexPixelWiseMultiplication::validate(const ITensorInfo *input1, con void CLComplexPixelWiseMultiplication::run(ITensorPack &tensors) { auto border_pack = select_border_input(tensors); - CLScheduler::get().enqueue_op(_border_handler, border_pack); + CLScheduler::get().enqueue_op(*_border_handler, border_pack); ICLOperator::run(tensors); } } // namespace experimental diff --git a/src/runtime/CL/functions/CLPoolingLayer.cpp b/src/runtime/CL/functions/CLPoolingLayer.cpp index a14818fffe..7f99aee9ba 100644 --- a/src/runtime/CL/functions/CLPoolingLayer.cpp +++ b/src/runtime/CL/functions/CLPoolingLayer.cpp @@ -24,8 +24,9 @@ #include "arm_compute/runtime/CL/functions/CLPoolingLayer.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLPoolingLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute @@ -79,7 +80,7 @@ void CLPoolingLayer::configure(const CLCompileContext &compile_context, ICLTenso default: ARM_COMPUTE_ERROR("Data layout not supported"); } - _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, pixel_value); + _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, pixel_value); // Tune kernels CLScheduler::get().tune_kernel_static(*_kernel); diff --git a/src/runtime/CL/functions/CLPriorBoxLayer.cpp b/src/runtime/CL/functions/CLPriorBoxLayer.cpp index fefbff639d..8cb971793e 100644 --- a/src/runtime/CL/functions/CLPriorBoxLayer.cpp +++ b/src/runtime/CL/functions/CLPriorBoxLayer.cpp @@ -24,13 +24,13 @@ #include "arm_compute/runtime/CL/functions/CLPriorBoxLayer.h" -#include "arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" - +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLPriorBoxLayerKernel.h" #include "support/MemorySupport.h" using namespace arm_compute; diff --git a/src/runtime/CL/functions/CLQLSTMLayer.cpp b/src/runtime/CL/functions/CLQLSTMLayer.cpp index 2d21d210e4..54df5a0a5e 100644 --- a/src/runtime/CL/functions/CLQLSTMLayer.cpp +++ b/src/runtime/CL/functions/CLQLSTMLayer.cpp @@ -30,7 +30,18 @@ #include "arm_compute/core/utils/misc/InfoHelpers.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLCopyKernel.h" +#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h" #include "src/core/helpers/WindowHelpers.h" +#include "support/MemorySupport.h" namespace arm_compute { @@ -86,10 +97,50 @@ void CLQLSTMLayer::TensorCopyKernel::run() } CLQLSTMLayer::CLQLSTMLayer(std::shared_ptr memory_manager) + : _input_to_input_reduction(support::cpp14::make_unique()), + _recurrent_to_input_reduction(support::cpp14::make_unique()), + _input_to_forget_reduction(support::cpp14::make_unique()), + _recurrent_to_forget_reduction(support::cpp14::make_unique()), + _input_to_cell_reduction(support::cpp14::make_unique()), + _recurrent_to_cell_reduction(support::cpp14::make_unique()), + _input_to_output_reduction(support::cpp14::make_unique()), + _recurrent_to_output_reduction(support::cpp14::make_unique()), + _projection_reduction(support::cpp14::make_unique()), + _layer_norms(), + _copy_output(support::cpp14::make_unique()) { + for(auto &norm : _layer_norms) + { + norm = support::cpp14::make_unique(); + } + _memory_group = MemoryGroup(std::move(memory_manager)); } +CLQLSTMLayer::~CLQLSTMLayer() = default; + +void CLQLSTMLayer::configure_layer_norm(LayerNormGate g, const ICLTensor *in) +{ + ARM_COMPUTE_ERROR_ON(!_has_layer_norm); + + CLTensor *out = &get_layer_norm_output(g); + _memory_group.manage(out); + out->allocator()->init(*(in->info())); + + get_layer_norm(g).configure(in, out, get_layer_norm_weight(g), get_layer_norm_bias(g)); +} + +Status CLQLSTMLayer::validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias) +{ + // Output quantization scale will be different, but ignored here + // since it will be configured at configure() stage. + const TensorInfo out + { + in + }; + return CLQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias); +} + void CLQLSTMLayer::configure_mm(const CLCompileContext &compile_context, CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info, const ICLTensor *mm_input, const ICLTensor *mm_weights, const ICLTensor *bias, CLTensor *mm_res, CLTensor *outstage_res, float gemmlowp_scale, @@ -200,18 +251,18 @@ void CLQLSTMLayer::configure(const CLCompileContext &compile_context, const ICLT _input_to_input_weights = lstm_params.input_to_input_weights(); _recurrent_to_input_weights = lstm_params.recurrent_to_input_weights(); - _input_to_input_reduction.configure(compile_context, _input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); - _recurrent_to_input_reduction.configure(compile_context, _recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); + _input_to_input_reduction->configure(compile_context, _input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); + _recurrent_to_input_reduction->configure(compile_context, _recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); } - _input_to_forget_reduction.configure(compile_context, input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); - _recurrent_to_forget_reduction.configure(compile_context, recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); - _input_to_cell_reduction.configure(compile_context, input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); - _recurrent_to_cell_reduction.configure(compile_context, recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); - _input_to_output_reduction.configure(compile_context, input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); - _recurrent_to_output_reduction.configure(compile_context, recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); + _input_to_forget_reduction->configure(compile_context, input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); + _recurrent_to_forget_reduction->configure(compile_context, recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); + _input_to_cell_reduction->configure(compile_context, input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); + _recurrent_to_cell_reduction->configure(compile_context, recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); + _input_to_output_reduction->configure(compile_context, input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); + _recurrent_to_output_reduction->configure(compile_context, recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); if(_has_projection) { - _projection_reduction.configure(compile_context, _projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true)); + _projection_reduction->configure(compile_context, _projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true)); if(_projection_bias != nullptr) { _projection_bias_add.configure(compile_context, _projection_bias, &_projection_eff_bias, &_projection_eff_bias, ConvertPolicy::SATURATE); @@ -543,7 +594,7 @@ void CLQLSTMLayer::configure(const CLCompileContext &compile_context, const ICLT } // Copy output_state_out to output - _copy_output.configure(compile_context, output_state_out, output); + _copy_output->configure(compile_context, output_state_out, output); } Status CLQLSTMLayer::validate(const ITensorInfo *input, @@ -1049,7 +1100,7 @@ void CLQLSTMLayer::run() } // Copy output_state_out to output - CLScheduler::get().enqueue(_copy_output); + CLScheduler::get().enqueue(*_copy_output); } void CLQLSTMLayer::prepare() @@ -1081,8 +1132,8 @@ void CLQLSTMLayer::prepare() { _input_to_input_eff_bias.allocator()->allocate(); _recurrent_to_input_eff_bias.allocator()->allocate(); - CLScheduler::get().enqueue(_input_to_input_reduction); - CLScheduler::get().enqueue(_recurrent_to_input_reduction); + CLScheduler::get().enqueue(*_input_to_input_reduction); + CLScheduler::get().enqueue(*_recurrent_to_input_reduction); _input_to_input_weights_transposed.allocator()->allocate(); _recurrent_to_input_weights_transposed.allocator()->allocate(); @@ -1097,17 +1148,17 @@ void CLQLSTMLayer::prepare() _recurrent_to_cell_eff_bias.allocator()->allocate(); _input_to_output_eff_bias.allocator()->allocate(); _recurrent_to_output_eff_bias.allocator()->allocate(); - CLScheduler::get().enqueue(_input_to_forget_reduction); - CLScheduler::get().enqueue(_recurrent_to_forget_reduction); - CLScheduler::get().enqueue(_input_to_cell_reduction); - CLScheduler::get().enqueue(_recurrent_to_cell_reduction); - CLScheduler::get().enqueue(_input_to_output_reduction); - CLScheduler::get().enqueue(_recurrent_to_output_reduction); + CLScheduler::get().enqueue(*_input_to_forget_reduction); + CLScheduler::get().enqueue(*_recurrent_to_forget_reduction); + CLScheduler::get().enqueue(*_input_to_cell_reduction); + CLScheduler::get().enqueue(*_recurrent_to_cell_reduction); + CLScheduler::get().enqueue(*_input_to_output_reduction); + CLScheduler::get().enqueue(*_recurrent_to_output_reduction); if(_has_projection) { _projection_eff_bias.allocator()->allocate(); - CLScheduler::get().enqueue(_projection_reduction); + CLScheduler::get().enqueue(*_projection_reduction); if(_projection_bias != nullptr) { _projection_bias_add.run(); diff --git a/src/runtime/CL/functions/CLQuantizationLayer.cpp b/src/runtime/CL/functions/CLQuantizationLayer.cpp index f0a446acab..f132547eb9 100644 --- a/src/runtime/CL/functions/CLQuantizationLayer.cpp +++ b/src/runtime/CL/functions/CLQuantizationLayer.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLQuantizationLayer.h" -#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h" +#include "src/core/CL/kernels/CLQuantizationLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLRNNLayer.cpp b/src/runtime/CL/functions/CLRNNLayer.cpp index 94e7f9440c..be3e539f98 100644 --- a/src/runtime/CL/functions/CLRNNLayer.cpp +++ b/src/runtime/CL/functions/CLRNNLayer.cpp @@ -28,17 +28,33 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLCopyKernel.h" +#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { using namespace arm_compute::misc::shape_calculator; CLRNNLayer::CLRNNLayer(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(), - _is_prepared(false) + : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation(), _fully_connected_kernel(), _copy_kernel(support::cpp14::make_unique()), _fully_connected_out(), + _gemm_output(), _add_output(), _is_prepared(false) { } +CLRNNLayer::~CLRNNLayer() = default; + Status CLRNNLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *recurrent_weights, const ITensorInfo *bias, const ITensorInfo *hidden_state, const ITensorInfo *output, const ActivationLayerInfo &info) { @@ -107,7 +123,7 @@ void CLRNNLayer::configure(const CLCompileContext &compile_context, const ICLTen _activation.configure(compile_context, &_add_output, hidden_state, info); _add_output.allocator()->allocate(); - _copy_kernel.configure(compile_context, hidden_state, output); + _copy_kernel->configure(compile_context, hidden_state, output); } void CLRNNLayer::run() @@ -122,7 +138,7 @@ void CLRNNLayer::run() _activation.run(); // copy hidden out to output - CLScheduler::get().enqueue(_copy_kernel); + CLScheduler::get().enqueue(*_copy_kernel); } void CLRNNLayer::prepare() diff --git a/src/runtime/CL/functions/CLROIAlignLayer.cpp b/src/runtime/CL/functions/CLROIAlignLayer.cpp index 2337cee33f..cf28a1a0fb 100644 --- a/src/runtime/CL/functions/CLROIAlignLayer.cpp +++ b/src/runtime/CL/functions/CLROIAlignLayer.cpp @@ -24,7 +24,8 @@ #include "arm_compute/runtime/CL/functions/CLROIAlignLayer.h" #include "arm_compute/core/CL/ICLArray.h" -#include "arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h" +#include "src/core/CL/kernels/CLROIAlignLayerKernel.h" +#include "src/core/CL/kernels/CLROIPoolingLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLROIPoolingLayer.cpp b/src/runtime/CL/functions/CLROIPoolingLayer.cpp index cdf60ce04f..b0e6716cce 100644 --- a/src/runtime/CL/functions/CLROIPoolingLayer.cpp +++ b/src/runtime/CL/functions/CLROIPoolingLayer.cpp @@ -22,10 +22,8 @@ * SOFTWARE. */ #include "arm_compute/runtime/CL/functions/CLROIPoolingLayer.h" - #include "arm_compute/core/CL/ICLArray.h" - -#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h" +#include "src/core/CL/kernels/CLROIPoolingLayerKernel.h" #include "support/MemorySupport.h" using namespace arm_compute; diff --git a/src/runtime/CL/functions/CLRange.cpp b/src/runtime/CL/functions/CLRange.cpp index 8bf2a0c43e..57b57bd305 100644 --- a/src/runtime/CL/functions/CLRange.cpp +++ b/src/runtime/CL/functions/CLRange.cpp @@ -24,10 +24,10 @@ #include "arm_compute/runtime/CL/functions/CLRange.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLRangeKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLRangeKernel.h" #include "support/MemorySupport.h" using namespace arm_compute; diff --git a/src/runtime/CL/functions/CLReduceMean.cpp b/src/runtime/CL/functions/CLReduceMean.cpp index 4ea7f7642f..b761dc2f99 100644 --- a/src/runtime/CL/functions/CLReduceMean.cpp +++ b/src/runtime/CL/functions/CLReduceMean.cpp @@ -24,11 +24,12 @@ #include "arm_compute/runtime/CL/functions/CLReduceMean.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/CL/CLValidate.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLReductionOperationKernel.h" #include "src/core/helpers/AutoConfiguration.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLReductionOperation.cpp b/src/runtime/CL/functions/CLReductionOperation.cpp index 208371c45d..7423f4bc87 100644 --- a/src/runtime/CL/functions/CLReductionOperation.cpp +++ b/src/runtime/CL/functions/CLReductionOperation.cpp @@ -30,9 +30,10 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLReductionOperationKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/runtime/Utils.h" - #include "support/MemorySupport.h" namespace arm_compute @@ -43,6 +44,8 @@ CLReductionOperation::CLReductionOperation(std::shared_ptr memor { } +CLReductionOperation::~CLReductionOperation() = default; + Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -211,7 +214,7 @@ void CLReductionOperation::configure(const CLCompileContext &compile_context, IC } // Configure reduction operation kernels - _reduction_kernels_vector.resize(_num_of_stages); + _reduction_kernels_vector.reserve(_num_of_stages); // Create temporary tensors if(_is_serial) @@ -221,11 +224,12 @@ void CLReductionOperation::configure(const CLCompileContext &compile_context, IC _memory_group.manage(&_results_vector.back()); } - _reduction_kernels_vector[0].configure(compile_context, input, output_internal, axis, op, 0); + _reduction_kernels_vector.emplace_back(support::cpp14::make_unique()); + _reduction_kernels_vector[0]->configure(compile_context, input, output_internal, axis, op, 0); } else { - _border_handlers_vector.resize(_num_of_stages); + _border_handlers_vector.reserve(_num_of_stages); _memory_group.manage(&_results_vector[0]); ReductionOperation first_kernel_op; @@ -269,15 +273,23 @@ void CLReductionOperation::configure(const CLCompileContext &compile_context, IC ARM_COMPUTE_ERROR("Not supported"); } - _reduction_kernels_vector[0].configure(compile_context, input, &_results_vector[0], axis, first_kernel_op); - _border_handlers_vector[0].configure(compile_context, input, _reduction_kernels_vector[0].border_size(), BorderMode::CONSTANT, pixelValue); + _reduction_kernels_vector.emplace_back(support::cpp14::make_unique()); + _reduction_kernels_vector[0]->configure(compile_context, input, &_results_vector[0], axis, first_kernel_op); + + _border_handlers_vector.emplace_back(support::cpp14::make_unique()); + _border_handlers_vector[0]->configure(compile_context, input, _reduction_kernels_vector[0]->border_size(), BorderMode::CONSTANT, pixelValue); // Apply ReductionOperation on intermediate stages for(unsigned int i = 1; i < _num_of_stages - 1; ++i) { _memory_group.manage(&_results_vector[i]); - _reduction_kernels_vector[i].configure(compile_context, &_results_vector[i - 1], &_results_vector[i], axis, intermediate_kernel_op); - _border_handlers_vector[i].configure(compile_context, &_results_vector[i - 1], _reduction_kernels_vector[i].border_size(), BorderMode::CONSTANT, pixelValue); + + _reduction_kernels_vector.emplace_back(support::cpp14::make_unique()); + _reduction_kernels_vector[i]->configure(compile_context, &_results_vector[i - 1], &_results_vector[i], axis, intermediate_kernel_op); + + _border_handlers_vector.emplace_back(support::cpp14::make_unique()); + _border_handlers_vector[i]->configure(compile_context, &_results_vector[i - 1], _reduction_kernels_vector[i]->border_size(), BorderMode::CONSTANT, pixelValue); + _results_vector[i - 1].allocator()->allocate(); } @@ -290,8 +302,12 @@ void CLReductionOperation::configure(const CLCompileContext &compile_context, IC _memory_group.manage(&_results_vector.back()); } - _reduction_kernels_vector[last_stage].configure(compile_context, &_results_vector[last_stage - 1], output_internal, axis, last_kernel_op, input_width); - _border_handlers_vector[last_stage].configure(compile_context, &_results_vector[last_stage - 1], _reduction_kernels_vector[last_stage].border_size(), BorderMode::CONSTANT, pixelValue); + _reduction_kernels_vector.emplace_back(support::cpp14::make_unique()); + _reduction_kernels_vector[last_stage]->configure(compile_context, &_results_vector[last_stage - 1], output_internal, axis, last_kernel_op, input_width); + + _border_handlers_vector.emplace_back(support::cpp14::make_unique()); + _border_handlers_vector[last_stage]->configure(compile_context, &_results_vector[last_stage - 1], _reduction_kernels_vector[last_stage]->border_size(), BorderMode::CONSTANT, pixelValue); + _results_vector[last_stage - 1].allocator()->allocate(); } @@ -308,14 +324,14 @@ void CLReductionOperation::run() if(_is_serial) { - CLScheduler::get().enqueue(_reduction_kernels_vector[0], false); + CLScheduler::get().enqueue(*_reduction_kernels_vector[0], false); } else { for(unsigned int i = 0; i < _num_of_stages; ++i) { - CLScheduler::get().enqueue(_border_handlers_vector[i], false); - CLScheduler::get().enqueue(_reduction_kernels_vector[i], false); + CLScheduler::get().enqueue(*_border_handlers_vector[i], false); + CLScheduler::get().enqueue(*_reduction_kernels_vector[i], false); } } diff --git a/src/runtime/CL/functions/CLRemap.cpp b/src/runtime/CL/functions/CLRemap.cpp index 1e3d614402..6466c2843b 100644 --- a/src/runtime/CL/functions/CLRemap.cpp +++ b/src/runtime/CL/functions/CLRemap.cpp @@ -24,11 +24,12 @@ #include "arm_compute/runtime/CL/functions/CLRemap.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLRemapKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLRemapKernel.h" #include "support/MemorySupport.h" #include @@ -53,5 +54,5 @@ void CLRemap::configure(const CLCompileContext &compile_context, ICLTensor *inpu auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, input, map_x, map_y, output, policy, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); } diff --git a/src/runtime/CL/functions/CLReorgLayer.cpp b/src/runtime/CL/functions/CLReorgLayer.cpp index 1dc41aefb5..4b2f70334f 100644 --- a/src/runtime/CL/functions/CLReorgLayer.cpp +++ b/src/runtime/CL/functions/CLReorgLayer.cpp @@ -24,10 +24,10 @@ #include "arm_compute/runtime/CL/functions/CLReorgLayer.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLReorgLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/CL/kernels/CLReorgLayerKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLReshapeLayer.cpp b/src/runtime/CL/functions/CLReshapeLayer.cpp index 273a761a0a..5112064b23 100644 --- a/src/runtime/CL/functions/CLReshapeLayer.cpp +++ b/src/runtime/CL/functions/CLReshapeLayer.cpp @@ -24,7 +24,7 @@ #include "arm_compute/runtime/CL/functions/CLReshapeLayer.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h" +#include "src/core/CL/kernels/CLReshapeLayerKernel.h" #include "support/MemorySupport.h" /** [CLReshapeLayer snippet] **/ diff --git a/src/runtime/CL/functions/CLReverse.cpp b/src/runtime/CL/functions/CLReverse.cpp index 213fbc8f32..b73d8de62e 100644 --- a/src/runtime/CL/functions/CLReverse.cpp +++ b/src/runtime/CL/functions/CLReverse.cpp @@ -23,8 +23,8 @@ */ #include "arm_compute/runtime/CL/functions/CLReverse.h" -#include "arm_compute/core/CL/kernels/CLReverseKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/CL/kernels/CLReverseKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLScale.cpp b/src/runtime/CL/functions/CLScale.cpp index e111c6d1f7..383b0cc305 100644 --- a/src/runtime/CL/functions/CLScale.cpp +++ b/src/runtime/CL/functions/CLScale.cpp @@ -24,10 +24,11 @@ #include "arm_compute/runtime/CL/functions/CLScale.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLScaleKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLScaleKernel.h" #include "support/MemorySupport.h" namespace arm_compute @@ -60,7 +61,7 @@ void CLScale::configure(const CLCompileContext &compile_context, ICLTensor *inpu { border_mode_to_use = BorderMode::CONSTANT; } - _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode_to_use, info.constant_border_value); + _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode_to_use, info.constant_border_value); } void CLScale::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, diff --git a/src/runtime/CL/functions/CLScharr3x3.cpp b/src/runtime/CL/functions/CLScharr3x3.cpp index b121ee7b99..e5d0d2d630 100644 --- a/src/runtime/CL/functions/CLScharr3x3.cpp +++ b/src/runtime/CL/functions/CLScharr3x3.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/runtime/CL/functions/CLScharr3x3.h" -#include "arm_compute/core/CL/kernels/CLScharr3x3Kernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLScharr3x3Kernel.h" #include "support/MemorySupport.h" #include @@ -41,5 +42,5 @@ void CLScharr3x3::configure(const CLCompileContext &compile_context, ICLTensor * auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, input, output_x, output_y, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); } diff --git a/src/runtime/CL/functions/CLSelect.cpp b/src/runtime/CL/functions/CLSelect.cpp index ef8010847b..374da91b78 100644 --- a/src/runtime/CL/functions/CLSelect.cpp +++ b/src/runtime/CL/functions/CLSelect.cpp @@ -23,9 +23,9 @@ */ #include "arm_compute/runtime/CL/functions/CLSelect.h" -#include "arm_compute/core/CL/kernels/CLSelectKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLSelectKernel.h" #include "support/MemorySupport.h" diff --git a/src/runtime/CL/functions/CLSlice.cpp b/src/runtime/CL/functions/CLSlice.cpp index f36550ba91..940540563a 100644 --- a/src/runtime/CL/functions/CLSlice.cpp +++ b/src/runtime/CL/functions/CLSlice.cpp @@ -24,9 +24,9 @@ #include "arm_compute/runtime/CL/functions/CLSlice.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/helpers/tensor_transform.h" +#include "src/core/CL/kernels/CLStridedSliceKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLSobel3x3.cpp b/src/runtime/CL/functions/CLSobel3x3.cpp index 566a4a1534..78376f935a 100644 --- a/src/runtime/CL/functions/CLSobel3x3.cpp +++ b/src/runtime/CL/functions/CLSobel3x3.cpp @@ -23,14 +23,17 @@ */ #include "arm_compute/runtime/CL/functions/CLSobel3x3.h" -#include "arm_compute/core/CL/kernels/CLSobel3x3Kernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLSobel3x3Kernel.h" #include "support/MemorySupport.h" #include using namespace arm_compute; +CLSobel3x3::~CLSobel3x3() = default; + void CLSobel3x3::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value) { configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value); @@ -41,5 +44,5 @@ void CLSobel3x3::configure(const CLCompileContext &compile_context, ICLTensor *i auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, input, output_x, output_y, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); } diff --git a/src/runtime/CL/functions/CLSobel5x5.cpp b/src/runtime/CL/functions/CLSobel5x5.cpp index f70e4f36f5..fa5d8945fb 100644 --- a/src/runtime/CL/functions/CLSobel5x5.cpp +++ b/src/runtime/CL/functions/CLSobel5x5.cpp @@ -24,20 +24,29 @@ #include "arm_compute/runtime/CL/functions/CLSobel5x5.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/ITensorAllocator.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLSobel5x5Kernel.h" +#include "support/MemorySupport.h" using namespace arm_compute; CLSobel5x5::CLSobel5x5(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _border_handler(), _tmp_x(), _tmp_y() + : _memory_group(std::move(memory_manager)), + _sobel_hor(support::cpp14::make_unique()), + _sobel_vert(support::cpp14::make_unique()), + _border_handler(support::cpp14::make_unique()), + _tmp_x(), + _tmp_y() { } +CLSobel5x5::~CLSobel5x5() = default; + void CLSobel5x5::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value) { configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value); @@ -58,8 +67,8 @@ void CLSobel5x5::configure(const CLCompileContext &compile_context, ICLTensor *i _tmp_y.allocator()->init(tensor_info); _memory_group.manage(&_tmp_x); _memory_group.manage(&_tmp_y); - _sobel_hor.configure(compile_context, input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(compile_context, &_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(compile_context, input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(compile_context, &_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); _tmp_x.allocator()->allocate(); _tmp_y.allocator()->allocate(); } @@ -67,27 +76,27 @@ void CLSobel5x5::configure(const CLCompileContext &compile_context, ICLTensor *i { _tmp_x.allocator()->init(tensor_info); _memory_group.manage(&_tmp_x); - _sobel_hor.configure(compile_context, input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(compile_context, &_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(compile_context, input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(compile_context, &_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); _tmp_x.allocator()->allocate(); } else if(run_sobel_y) { _tmp_y.allocator()->init(tensor_info); _memory_group.manage(&_tmp_y); - _sobel_hor.configure(compile_context, input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(compile_context, nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(compile_context, input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(compile_context, nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); _tmp_y.allocator()->allocate(); } - _border_handler.configure(compile_context, input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(compile_context, input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value)); } void CLSobel5x5::run() { - CLScheduler::get().enqueue(_border_handler, false); + CLScheduler::get().enqueue(*_border_handler, false); MemoryGroupResourceScope scope_mg(_memory_group); - CLScheduler::get().enqueue(_sobel_hor, false); - CLScheduler::get().enqueue(_sobel_vert); + CLScheduler::get().enqueue(*_sobel_hor, false); + CLScheduler::get().enqueue(*_sobel_vert); } diff --git a/src/runtime/CL/functions/CLSobel7x7.cpp b/src/runtime/CL/functions/CLSobel7x7.cpp index 792432e841..f462adb0ed 100644 --- a/src/runtime/CL/functions/CLSobel7x7.cpp +++ b/src/runtime/CL/functions/CLSobel7x7.cpp @@ -24,20 +24,29 @@ #include "arm_compute/runtime/CL/functions/CLSobel7x7.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/ITensorAllocator.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLSobel7x7Kernel.h" +#include "support/MemorySupport.h" using namespace arm_compute; CLSobel7x7::CLSobel7x7(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _border_handler(), _tmp_x(), _tmp_y() + : _memory_group(std::move(memory_manager)), + _sobel_hor(support::cpp14::make_unique()), + _sobel_vert(support::cpp14::make_unique()), + _border_handler(support::cpp14::make_unique()), + _tmp_x(), + _tmp_y() { } +CLSobel7x7::~CLSobel7x7() = default; + void CLSobel7x7::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value) { configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value); @@ -58,8 +67,8 @@ void CLSobel7x7::configure(const CLCompileContext &compile_context, ICLTensor *i _tmp_y.allocator()->init(tensor_info); _memory_group.manage(&_tmp_x); _memory_group.manage(&_tmp_y); - _sobel_hor.configure(compile_context, input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(compile_context, &_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(compile_context, input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(compile_context, &_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); _tmp_x.allocator()->allocate(); _tmp_y.allocator()->allocate(); } @@ -67,27 +76,27 @@ void CLSobel7x7::configure(const CLCompileContext &compile_context, ICLTensor *i { _tmp_x.allocator()->init(tensor_info); _memory_group.manage(&_tmp_x); - _sobel_hor.configure(compile_context, input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(compile_context, &_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(compile_context, input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(compile_context, &_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); _tmp_x.allocator()->allocate(); } else if(run_sobel_y) { _tmp_y.allocator()->init(tensor_info); _memory_group.manage(&_tmp_y); - _sobel_hor.configure(compile_context, input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(compile_context, nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(compile_context, input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(compile_context, nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); _tmp_y.allocator()->allocate(); } - _border_handler.configure(compile_context, input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(compile_context, input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value)); } void CLSobel7x7::run() { - CLScheduler::get().enqueue(_border_handler, false); + CLScheduler::get().enqueue(*_border_handler, false); MemoryGroupResourceScope scope_mg(_memory_group); - CLScheduler::get().enqueue(_sobel_hor, false); - CLScheduler::get().enqueue(_sobel_vert); + CLScheduler::get().enqueue(*_sobel_hor, false); + CLScheduler::get().enqueue(*_sobel_vert); } diff --git a/src/runtime/CL/functions/CLSoftmaxLayer.cpp b/src/runtime/CL/functions/CLSoftmaxLayer.cpp index 759c8706a1..4caf91488e 100644 --- a/src/runtime/CL/functions/CLSoftmaxLayer.cpp +++ b/src/runtime/CL/functions/CLSoftmaxLayer.cpp @@ -24,24 +24,38 @@ #include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h" #include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/ICLKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLSoftmaxLayerKernel.h" #include "src/core/helpers/SoftmaxHelpers.h" +#include "support/MemorySupport.h" namespace arm_compute { template CLSoftmaxLayerGeneric::CLSoftmaxLayerGeneric(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _permute_input(), _permute_output(), _max_shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp(), _input_permuted(), _output_permuted(), + : _memory_group(std::move(memory_manager)), + _permute_input(), + _permute_output(), + _max_shift_exp_sum_kernel(support::cpp14::make_unique()), + _norm_kernel(support::cpp14::make_unique()), + _max(), + _sum(), + _tmp(), + _input_permuted(), + _output_permuted(), _needs_permute() { } +template +CLSoftmaxLayerGeneric::~CLSoftmaxLayerGeneric() = default; + template void CLSoftmaxLayerGeneric::configure(const ICLTensor *input, ICLTensor *output, float beta, int32_t axis) { @@ -78,7 +92,7 @@ void CLSoftmaxLayerGeneric::configure(const CLCompileContext &compile_co _sum.allocator()->init(tmp_input->info()->clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type)); // Set GPU target to kernels - _max_shift_exp_sum_kernel.set_target(CLScheduler::get().target()); + _max_shift_exp_sum_kernel->set_target(CLScheduler::get().target()); // Manage intermediate buffers _memory_group.manage(&_tmp); @@ -91,8 +105,8 @@ void CLSoftmaxLayerGeneric::configure(const CLCompileContext &compile_co softmax_info.input_data_type = tmp_input->info()->data_type(); // Configure kernels - _max_shift_exp_sum_kernel.configure(compile_context, tmp_input, &_max, &_tmp, &_sum, softmax_info); - _norm_kernel.configure(compile_context, &_tmp, &_sum, tmp_output, softmax_info); + _max_shift_exp_sum_kernel->configure(compile_context, tmp_input, &_max, &_tmp, &_sum, softmax_info); + _norm_kernel->configure(compile_context, &_tmp, &_sum, tmp_output, softmax_info); // Allocate intermediate buffers _tmp.allocator()->allocate(); @@ -156,8 +170,8 @@ void CLSoftmaxLayerGeneric::run() _permute_input.run(); } - CLScheduler::get().enqueue(_max_shift_exp_sum_kernel, false); - CLScheduler::get().enqueue(_norm_kernel, !_needs_permute); + CLScheduler::get().enqueue(*_max_shift_exp_sum_kernel, false); + CLScheduler::get().enqueue(*_norm_kernel, !_needs_permute); if(_needs_permute) { diff --git a/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp b/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp index eea3cb535f..e83def5677 100644 --- a/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp +++ b/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp @@ -29,14 +29,21 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLMemsetKernel.h" +#include "src/core/CL/kernels/CLSpaceToBatchLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { CLSpaceToBatchLayer::CLSpaceToBatchLayer() - : _space_to_batch_kernel(), _memset_kernel(), _has_padding(false) + : _space_to_batch_kernel(support::cpp14::make_unique()), + _memset_kernel(support::cpp14::make_unique()), + _has_padding(false) { } +CLSpaceToBatchLayer::~CLSpaceToBatchLayer() = default; + void CLSpaceToBatchLayer::configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output) { configure(CLKernelLibrary::get().get_compile_context(), input, block_shape, paddings, output); @@ -49,9 +56,9 @@ void CLSpaceToBatchLayer::configure(const CLCompileContext &compile_context, con if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size()) { _has_padding = true; - _memset_kernel.configure(compile_context, output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info())); + _memset_kernel->configure(compile_context, output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info())); } - _space_to_batch_kernel.configure(compile_context, input, block_shape, paddings, output); + _space_to_batch_kernel->configure(compile_context, input, block_shape, paddings, output); } void CLSpaceToBatchLayer::configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output) @@ -67,9 +74,9 @@ void CLSpaceToBatchLayer::configure(const CLCompileContext &compile_context, con if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size()) { _has_padding = true; - _memset_kernel.configure(compile_context, output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info())); + _memset_kernel->configure(compile_context, output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info())); } - _space_to_batch_kernel.configure(compile_context, input, block_shape_x, block_shape_y, padding_left, padding_right, output); + _space_to_batch_kernel->configure(compile_context, input, block_shape_x, block_shape_y, padding_left, padding_right, output); } Status CLSpaceToBatchLayer::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output) @@ -94,8 +101,8 @@ void CLSpaceToBatchLayer::run() // Zero out output only if we have paddings if(_has_padding) { - CLScheduler::get().enqueue(_memset_kernel, true); + CLScheduler::get().enqueue(*_memset_kernel, true); } - CLScheduler::get().enqueue(_space_to_batch_kernel, true); + CLScheduler::get().enqueue(*_space_to_batch_kernel, true); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLSpaceToDepthLayer.cpp b/src/runtime/CL/functions/CLSpaceToDepthLayer.cpp index 06aa92d6fa..db8c4953cc 100644 --- a/src/runtime/CL/functions/CLSpaceToDepthLayer.cpp +++ b/src/runtime/CL/functions/CLSpaceToDepthLayer.cpp @@ -29,14 +29,18 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLSpaceToDepthLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { CLSpaceToDepthLayer::CLSpaceToDepthLayer() - : _space_to_depth_kernel() + : _space_to_depth_kernel(support::cpp14::make_unique()) { } +CLSpaceToDepthLayer::~CLSpaceToDepthLayer() = default; + void CLSpaceToDepthLayer::configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape) { configure(CLKernelLibrary::get().get_compile_context(), input, output, block_shape); @@ -44,7 +48,7 @@ void CLSpaceToDepthLayer::configure(const ICLTensor *input, ICLTensor *output, i void CLSpaceToDepthLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape) { - _space_to_depth_kernel.configure(compile_context, input, output, block_shape); + _space_to_depth_kernel->configure(compile_context, input, output, block_shape); } Status CLSpaceToDepthLayer::validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape) @@ -54,6 +58,6 @@ Status CLSpaceToDepthLayer::validate(const ITensorInfo *input, const ITensorInfo void CLSpaceToDepthLayer::run() { - CLScheduler::get().enqueue(_space_to_depth_kernel, true); + CLScheduler::get().enqueue(*_space_to_depth_kernel, true); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLStackLayer.cpp b/src/runtime/CL/functions/CLStackLayer.cpp index 39f0ab4779..f4aa78a72d 100644 --- a/src/runtime/CL/functions/CLStackLayer.cpp +++ b/src/runtime/CL/functions/CLStackLayer.cpp @@ -32,6 +32,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLStackLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { @@ -42,6 +44,8 @@ CLStackLayer::CLStackLayer() // NOLINT { } +CLStackLayer::~CLStackLayer() = default; + void CLStackLayer::configure(const std::vector &input, int axis, ICLTensor *output) { configure(CLKernelLibrary::get().get_compile_context(), input, axis, output); @@ -50,14 +54,15 @@ void CLStackLayer::configure(const std::vector &input, int axis, IC void CLStackLayer::configure(const CLCompileContext &compile_context, const std::vector &input, int axis, ICLTensor *output) { _num_inputs = input.size(); - _stack_kernels.resize(_num_inputs); + _stack_kernels.reserve(_num_inputs); // Wrap around negative values const unsigned int axis_u = wrap_around(axis, static_cast(input[0]->info()->num_dimensions() + 1)); for(unsigned int i = 0; i < _num_inputs; i++) { - _stack_kernels[i].configure(compile_context, input[i], axis_u, i, _num_inputs, output); + _stack_kernels.emplace_back(support::cpp14::make_unique()); + _stack_kernels.back()->configure(compile_context, input[i], axis_u, i, _num_inputs, output); } } @@ -87,7 +92,7 @@ void CLStackLayer::run() { for(unsigned i = 0; i < _num_inputs; i++) { - CLScheduler::get().enqueue(_stack_kernels[i], false); + CLScheduler::get().enqueue(*_stack_kernels[i], false); } } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLStridedSlice.cpp b/src/runtime/CL/functions/CLStridedSlice.cpp index b78073dd67..3f6814f5ce 100644 --- a/src/runtime/CL/functions/CLStridedSlice.cpp +++ b/src/runtime/CL/functions/CLStridedSlice.cpp @@ -24,8 +24,8 @@ #include "arm_compute/runtime/CL/functions/CLStridedSlice.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/CL/kernels/CLStridedSliceKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLTableLookup.cpp b/src/runtime/CL/functions/CLTableLookup.cpp index 3d2d1853ca..8282f37e4b 100644 --- a/src/runtime/CL/functions/CLTableLookup.cpp +++ b/src/runtime/CL/functions/CLTableLookup.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLTableLookup.h" -#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h" +#include "src/core/CL/kernels/CLTableLookupKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLThreshold.cpp b/src/runtime/CL/functions/CLThreshold.cpp index bdbf37e841..250f6f034f 100644 --- a/src/runtime/CL/functions/CLThreshold.cpp +++ b/src/runtime/CL/functions/CLThreshold.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLThreshold.h" -#include "arm_compute/core/CL/kernels/CLThresholdKernel.h" +#include "src/core/CL/kernels/CLThresholdKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLTile.cpp b/src/runtime/CL/functions/CLTile.cpp index 68efad0125..8384e48baf 100644 --- a/src/runtime/CL/functions/CLTile.cpp +++ b/src/runtime/CL/functions/CLTile.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLTile.h" -#include "arm_compute/core/CL/kernels/CLTileKernel.h" +#include "src/core/CL/kernels/CLTileKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLTranspose.cpp b/src/runtime/CL/functions/CLTranspose.cpp index 8cade66a90..43fa7a012a 100644 --- a/src/runtime/CL/functions/CLTranspose.cpp +++ b/src/runtime/CL/functions/CLTranspose.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLTranspose.h" -#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" +#include "src/core/CL/kernels/CLTransposeKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLUpsampleLayer.cpp b/src/runtime/CL/functions/CLUpsampleLayer.cpp index e9456c100b..10b4b76a5e 100644 --- a/src/runtime/CL/functions/CLUpsampleLayer.cpp +++ b/src/runtime/CL/functions/CLUpsampleLayer.cpp @@ -26,15 +26,19 @@ #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Utils.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLUpsampleLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { CLUpsampleLayer::CLUpsampleLayer() // NOLINT - : _upsample(), + : _upsample(support::cpp14::make_unique()), _output(nullptr) { } +CLUpsampleLayer::~CLUpsampleLayer() = default; + Status CLUpsampleLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy upsampling_policy) { @@ -53,11 +57,11 @@ void CLUpsampleLayer::configure(const CLCompileContext &compile_context, ICLTens ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); _output = output; - _upsample.configure(compile_context, input, _output, info, upsampling_policy); + _upsample->configure(compile_context, input, _output, info, upsampling_policy); } void CLUpsampleLayer::run() { - CLScheduler::get().enqueue(_upsample, false); + CLScheduler::get().enqueue(*_upsample, false); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLWarpAffine.cpp b/src/runtime/CL/functions/CLWarpAffine.cpp index fffc58c8d0..86e5a7bd86 100644 --- a/src/runtime/CL/functions/CLWarpAffine.cpp +++ b/src/runtime/CL/functions/CLWarpAffine.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/runtime/CL/functions/CLWarpAffine.h" -#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLWarpAffineKernel.h" #include "support/MemorySupport.h" #include @@ -42,5 +43,5 @@ void CLWarpAffine::configure(const CLCompileContext &compile_context, ICLTensor auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, input, output, matrix, policy); _kernel = std::move(k); - _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); } diff --git a/src/runtime/CL/functions/CLWarpPerspective.cpp b/src/runtime/CL/functions/CLWarpPerspective.cpp index 2b4b187e38..7e8bc5cdff 100644 --- a/src/runtime/CL/functions/CLWarpPerspective.cpp +++ b/src/runtime/CL/functions/CLWarpPerspective.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/runtime/CL/functions/CLWarpPerspective.h" -#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLWarpPerspectiveKernel.h" #include "support/MemorySupport.h" #include @@ -42,5 +43,5 @@ void CLWarpPerspective::configure(const CLCompileContext &compile_context, ICLTe auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, input, output, matrix, policy); _kernel = std::move(k); - _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); } diff --git a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp index 7ad017f918..7af42904e8 100644 --- a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp @@ -28,6 +28,15 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h" +#include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h" +#include "support/MemorySupport.h" using namespace arm_compute; @@ -90,11 +99,13 @@ bool check_support_fast_math(const Size2D &output_tile, const Size2D &kernel_siz } // namespace CLWinogradConvolutionLayer::CLWinogradConvolutionLayer(std::shared_ptr memory_manager) - : _memory_group(memory_manager), _batched_mm(memory_manager), _input_transform(), _filter_transform(), _output_transform(), _input0(), _input1(), _batched_mm_output(), _original_weights(nullptr), - _is_prepared(false) + : _memory_group(memory_manager), _batched_mm(memory_manager), _input_transform(), _filter_transform(support::cpp14::make_unique()), + _output_transform(support::cpp14::make_unique()), _input0(), _input1(), _batched_mm_output(), _original_weights(nullptr), _is_prepared(false) { } +CLWinogradConvolutionLayer::~CLWinogradConvolutionLayer() = default; + void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, bool enable_fast_math) { @@ -139,7 +150,7 @@ void CLWinogradConvolutionLayer::configure(const CLCompileContext &compile_conte _input_transform.configure(compile_context, input, &_input0, winograd_info); // Configure filter transform - _filter_transform.configure(compile_context, weights, &_input1, winograd_info); + _filter_transform->configure(compile_context, weights, &_input1, winograd_info); // Configure batched matrix multiply _batched_mm.configure(compile_context, &_input0, &_input1, nullptr, &_batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/, 0, false, false, @@ -147,7 +158,7 @@ void CLWinogradConvolutionLayer::configure(const CLCompileContext &compile_conte (input->info()->data_type() == DataType::F16))); // Configure output transform - _output_transform.configure(compile_context, &_batched_mm_output, biases, output, winograd_info, act_info); + _output_transform->configure(compile_context, &_batched_mm_output, biases, output, winograd_info, act_info); // Allocate temporary tensors _input0.allocator()->allocate(); @@ -218,7 +229,7 @@ void CLWinogradConvolutionLayer::run() _batched_mm.run(); // Run output transform - CLScheduler::get().enqueue(_output_transform); + CLScheduler::get().enqueue(*_output_transform); } void CLWinogradConvolutionLayer::prepare() @@ -227,7 +238,7 @@ void CLWinogradConvolutionLayer::prepare() { // Run filter transform and mark original weights as unused _input1.allocator()->allocate(); - CLScheduler::get().enqueue(_filter_transform, false); + CLScheduler::get().enqueue(*_filter_transform, false); _original_weights->mark_as_unused(); // Prepare GEMM and release reshaped weights if marked unused by CLGEMM diff --git a/src/runtime/CL/functions/CLWinogradInputTransform.cpp b/src/runtime/CL/functions/CLWinogradInputTransform.cpp index 9498206549..308c41f714 100644 --- a/src/runtime/CL/functions/CLWinogradInputTransform.cpp +++ b/src/runtime/CL/functions/CLWinogradInputTransform.cpp @@ -24,8 +24,9 @@ #include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h" #include "arm_compute/core/Error.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLWinogradInputTransformKernel.h" #include "support/MemorySupport.h" using namespace arm_compute; @@ -40,7 +41,7 @@ void CLWinogradInputTransform::configure(const CLCompileContext &compile_context auto k = arm_compute::support::cpp14::make_unique(); k->configure(compile_context, input, output, winograd_info); _kernel = std::move(k); - _border_handler.configure(compile_context, input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue()); + _border_handler->configure(compile_context, input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue()); } Status CLWinogradInputTransform::validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info) diff --git a/src/runtime/CL/functions/CLYOLOLayer.cpp b/src/runtime/CL/functions/CLYOLOLayer.cpp index d553f97009..46bf220b0c 100644 --- a/src/runtime/CL/functions/CLYOLOLayer.cpp +++ b/src/runtime/CL/functions/CLYOLOLayer.cpp @@ -23,8 +23,8 @@ */ #include "arm_compute/runtime/CL/functions/CLYOLOLayer.h" -#include "arm_compute/core/CL/kernels/CLYOLOLayerKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/CL/kernels/CLYOLOLayerKernel.h" #include "support/MemorySupport.h" using namespace arm_compute; diff --git a/src/runtime/CL/tuners/BifrostTuner.cpp b/src/runtime/CL/tuners/BifrostTuner.cpp index a6474c9835..9490e0b219 100644 --- a/src/runtime/CL/tuners/BifrostTuner.cpp +++ b/src/runtime/CL/tuners/BifrostTuner.cpp @@ -24,7 +24,7 @@ #include "arm_compute/runtime/CL/tuners/BifrostTuner.h" #include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernels.h" +#include "src/core/CL/CLKernels.h" #include "support/Cast.h" namespace arm_compute diff --git a/src/runtime/CL/tuners/MidgardTuner.cpp b/src/runtime/CL/tuners/MidgardTuner.cpp index 58b0d579d2..72734f2207 100644 --- a/src/runtime/CL/tuners/MidgardTuner.cpp +++ b/src/runtime/CL/tuners/MidgardTuner.cpp @@ -24,7 +24,7 @@ #include "arm_compute/runtime/CL/tuners/MidgardTuner.h" #include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernels.h" +#include "src/core/CL/CLKernels.h" #include "support/Cast.h" namespace arm_compute diff --git a/tests/CL/Helper.h b/tests/CL/Helper.h index e0d584c5ce..e548af4938 100644 --- a/tests/CL/Helper.h +++ b/tests/CL/Helper.h @@ -24,13 +24,15 @@ #ifndef ARM_COMPUTE_TEST_CL_HELPER_H #define ARM_COMPUTE_TEST_CL_HELPER_H -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" -#include "arm_compute/core/CL/kernels/CLMemsetKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLMemsetKernel.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" #include "arm_compute/runtime/IFunction.h" + +#include "src/core/CL/ICLKernel.h" + #include "support/MemorySupport.h" namespace arm_compute @@ -93,7 +95,7 @@ public: auto k = arm_compute::support::cpp14::make_unique(); k->configure(first, std::forward(args)...); _kernel = std::move(k); - _border_handler.configure(first, BorderSize(bordersize), BorderMode::CONSTANT, PixelValue()); + _border_handler->configure(first, BorderSize(bordersize), BorderMode::CONSTANT, PixelValue()); } }; diff --git a/tests/benchmark/CL/Scale.cpp b/tests/benchmark/CL/Scale.cpp index 58727edcae..8a1ceb663e 100644 --- a/tests/benchmark/CL/Scale.cpp +++ b/tests/benchmark/CL/Scale.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/runtime/CL/functions/CLScale.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/benchmark/fixtures/ScaleFixture.h" #include "tests/datasets/BorderModeDataset.h" diff --git a/tests/validate_examples/cl_gemm.cpp b/tests/validate_examples/cl_gemm.cpp index 0e71f9d5bf..99f7513624 100644 --- a/tests/validate_examples/cl_gemm.cpp +++ b/tests/validate_examples/cl_gemm.cpp @@ -28,9 +28,24 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CL/CLScheduler.h" - +#include "arm_compute/runtime/CL/functions/CLGEMM.h" +#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" +#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" +#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/core/CL/kernels/CLIm2ColKernel.h" +#include "src/core/CL/kernels/CLWeightsReshapeKernel.h" #include "tests/AssetsLibrary.h" #include "tests/CL/CLAccessor.h" #include "tests/Globals.h" diff --git a/tests/validation/CL/ArgMinMax.cpp b/tests/validation/CL/ArgMinMax.cpp index 7dcd22e795..2508c63524 100644 --- a/tests/validation/CL/ArgMinMax.cpp +++ b/tests/validation/CL/ArgMinMax.cpp @@ -22,12 +22,11 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h" #include "arm_compute/runtime/CL/functions/CLReductionOperation.h" - -#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "tests/CL/CLAccessor.h" #include "tests/datasets/ShapeDatasets.h" #include "tests/datasets/SplitDataset.h" @@ -54,7 +53,8 @@ const auto ArgMinMaxSmallDataset = framework::dataset::make("Shape", TensorShape{ 2560, 2U, 2U, 2U }, }); -const auto ArgMinMaxLargeDataset = framework::dataset::make("Shape", { TensorShape{ 517U, 123U, 13U, 2U } }); +const auto ArgMinMaxLargeDataset = framework::dataset::make("Shape", +{ TensorShape{ 517U, 123U, 13U, 2U } }); } // namespace TEST_SUITE(CL) TEST_SUITE(ArgMinMax) diff --git a/tests/validation/CL/BatchNormalizationLayer.cpp b/tests/validation/CL/BatchNormalizationLayer.cpp index e67f4cc199..88f00b0eff 100644 --- a/tests/validation/CL/BatchNormalizationLayer.cpp +++ b/tests/validation/CL/BatchNormalizationLayer.cpp @@ -58,10 +58,11 @@ const auto act_infos = framework::dataset::make("Activat ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f), }); -const auto common_fusion_dataset = combine(combine(combine(framework::dataset::make("UseBias", { false, true }), - framework::dataset::make("UseBeta", { false, true })), - framework::dataset::make("UseGamma", { false, true })), - framework::dataset::make("Epsilon", { 0.001f })); +const auto common_fusion_dataset = combine(combine(combine(framework::dataset::make("UseBias", +{ false, true }), +framework::dataset::make("UseBeta", { false, true })), +framework::dataset::make("UseGamma", { false, true })), +framework::dataset::make("Epsilon", { 0.001f })); bool validate_zero_padding(TensorShape shape0, const TensorShape shape1, float epsilon, ActivationLayerInfo act_info, DataType dt, DataLayout data_layout) { @@ -141,9 +142,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( // clang-format on // *INDENT-ON* -DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallRandomBatchNormalizationLayerDataset(), - act_infos), - framework::dataset::make("DataType", { DataType::F32, DataType::F16 })), +DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallRandomBatchNormalizationLayerDataset(), act_infos), framework::dataset::make("DataType", { DataType::F32, DataType::F16 })), framework::dataset::make("DataLayout", { DataLayout::NHWC })), shape0, shape1, episilon, act_infos, data_type, data_layout) { @@ -154,8 +153,7 @@ DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, combine(combine TEST_SUITE(Float) TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallRandomBatchNormalizationLayerDataset(), - combine(framework::dataset::make("UseBeta", { false, true }), - framework::dataset::make("UseGamma", { false, true }))), + combine(framework::dataset::make("UseBeta", { false, true }), framework::dataset::make("UseGamma", { false, true }))), act_infos), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) @@ -167,9 +165,9 @@ TEST_SUITE_END() //FP32 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallRandomBatchNormalizationLayerDataset(), - combine(framework::dataset::make("UseBeta", { false, true }), - framework::dataset::make("UseGamma", { false, true }))), - framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))), + combine(framework::dataset::make("UseBeta", { false, true }), framework::dataset::make("UseGamma", { false, true }))), + framework::dataset::make("ActivationInfo", + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { diff --git a/tests/validation/CL/BatchToSpaceLayer.cpp b/tests/validation/CL/BatchToSpaceLayer.cpp index f553787729..e90ac921c5 100644 --- a/tests/validation/CL/BatchToSpaceLayer.cpp +++ b/tests/validation/CL/BatchToSpaceLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/BoundingBoxTransform.cpp b/tests/validation/CL/BoundingBoxTransform.cpp index 82dfa31606..2a7f1667d6 100644 --- a/tests/validation/CL/BoundingBoxTransform.cpp +++ b/tests/validation/CL/BoundingBoxTransform.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/CannyEdge.cpp b/tests/validation/CL/CannyEdge.cpp index 84c73c1a8c..10da5f4363 100644 --- a/tests/validation/CL/CannyEdge.cpp +++ b/tests/validation/CL/CannyEdge.cpp @@ -48,8 +48,9 @@ namespace /* Allowed ratio of mismatches between target and reference (1.0 = 100%) */ const float allowed_mismatch_ratio = 0.1f; -const auto data = combine(framework::dataset::make("GradientSize", { 3, 5, 7 }), - combine(framework::dataset::make("Normalization", { MagnitudeType::L1NORM, MagnitudeType::L2NORM }), datasets::BorderModes())); +const auto data = combine(framework::dataset::make("GradientSize", +{ 3, 5, 7 }), +combine(framework::dataset::make("Normalization", { MagnitudeType::L1NORM, MagnitudeType::L2NORM }), datasets::BorderModes())); } // namespace TEST_SUITE(CL) diff --git a/tests/validation/CL/ChannelCombine.cpp b/tests/validation/CL/ChannelCombine.cpp index 6187e72960..7ef8414d7e 100644 --- a/tests/validation/CL/ChannelCombine.cpp +++ b/tests/validation/CL/ChannelCombine.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/ChannelExtract.cpp b/tests/validation/CL/ChannelExtract.cpp index 7657d5a7ea..7a0dcf3e7d 100644 --- a/tests/validation/CL/ChannelExtract.cpp +++ b/tests/validation/CL/ChannelExtract.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/Col2Im.cpp b/tests/validation/CL/Col2Im.cpp index d6ef010b53..b651bf8918 100644 --- a/tests/validation/CL/Col2Im.cpp +++ b/tests/validation/CL/Col2Im.cpp @@ -21,9 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" #include "arm_compute/core/Types.h" - +#include "src/core/CL/kernels/CLCol2ImKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/framework/Asserts.h" diff --git a/tests/validation/CL/ConvertFullyConnectedWeights.cpp b/tests/validation/CL/ConvertFullyConnectedWeights.cpp index a5065fb217..70d7b2c767 100644 --- a/tests/validation/CL/ConvertFullyConnectedWeights.cpp +++ b/tests/validation/CL/ConvertFullyConnectedWeights.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/CropResize.cpp b/tests/validation/CL/CropResize.cpp index 636db1728f..f1fae3d5cc 100644 --- a/tests/validation/CL/CropResize.cpp +++ b/tests/validation/CL/CropResize.cpp @@ -25,7 +25,6 @@ #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/runtime/CL/functions/CLCropResize.h" - #include "tests/CL/CLAccessor.h" #include "tests/datasets/CropResizeDataset.h" #include "tests/framework/Asserts.h" diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp index c677f5ae96..c284cdcee3 100644 --- a/tests/validation/CL/DeconvolutionLayer.cpp +++ b/tests/validation/CL/DeconvolutionLayer.cpp @@ -21,7 +21,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" diff --git a/tests/validation/CL/DepthToSpaceLayer.cpp b/tests/validation/CL/DepthToSpaceLayer.cpp index fd570ad753..7cee4b7129 100644 --- a/tests/validation/CL/DepthToSpaceLayer.cpp +++ b/tests/validation/CL/DepthToSpaceLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp b/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp index 058d9b3ecc..b1cd379574 100644 --- a/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp +++ b/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp @@ -21,12 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" diff --git a/tests/validation/CL/Fill.cpp b/tests/validation/CL/Fill.cpp index b86dae10fd..38950079da 100644 --- a/tests/validation/CL/Fill.cpp +++ b/tests/validation/CL/Fill.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/FillBorder.cpp b/tests/validation/CL/FillBorder.cpp index e0b283b56b..e2afd6494e 100644 --- a/tests/validation/CL/FillBorder.cpp +++ b/tests/validation/CL/FillBorder.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/Globals.h" #include "tests/datasets/BorderModeDataset.h" diff --git a/tests/validation/CL/Flatten.cpp b/tests/validation/CL/Flatten.cpp index a00041b0a4..04f720f7e5 100644 --- a/tests/validation/CL/Flatten.cpp +++ b/tests/validation/CL/Flatten.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/Floor.cpp b/tests/validation/CL/Floor.cpp index 58645b9d85..2961cfa3f2 100644 --- a/tests/validation/CL/Floor.cpp +++ b/tests/validation/CL/Floor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/FuseBatchNormalization.cpp b/tests/validation/CL/FuseBatchNormalization.cpp index 0736250727..548feab2ed 100644 --- a/tests/validation/CL/FuseBatchNormalization.cpp +++ b/tests/validation/CL/FuseBatchNormalization.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/GEMM.cpp b/tests/validation/CL/GEMM.cpp index c9540c352a..392eeb1510 100644 --- a/tests/validation/CL/GEMM.cpp +++ b/tests/validation/CL/GEMM.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp index ce000bd8e1..1cfeac59af 100644 --- a/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp +++ b/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp @@ -21,9 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/framework/Asserts.h" diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp index 16e4a137eb..0c651cddc2 100644 --- a/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp +++ b/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp @@ -21,11 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/framework/Asserts.h" diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp index d8618bd881..fa256280ca 100644 --- a/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp +++ b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp @@ -21,12 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" diff --git a/tests/validation/CL/GEMMMatrixMultiply.cpp b/tests/validation/CL/GEMMMatrixMultiply.cpp index e521dd5a02..5d2e211d91 100644 --- a/tests/validation/CL/GEMMMatrixMultiply.cpp +++ b/tests/validation/CL/GEMMMatrixMultiply.cpp @@ -21,12 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" diff --git a/tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp b/tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp index fcbf8ce110..b2701e7f6c 100644 --- a/tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp +++ b/tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp @@ -21,14 +21,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" diff --git a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp index 6ba5012d15..1cf1209dee 100644 --- a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp +++ b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp @@ -21,12 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp index 5629a80f8e..0a0a1fc397 100644 --- a/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp +++ b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp @@ -21,14 +21,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp index 33912ae2ba..789b77377d 100644 --- a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp +++ b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp @@ -21,13 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" diff --git a/tests/validation/CL/GEMMReshapeLHSMatrix.cpp b/tests/validation/CL/GEMMReshapeLHSMatrix.cpp index d9439f63f1..4af495944e 100644 --- a/tests/validation/CL/GEMMReshapeLHSMatrix.cpp +++ b/tests/validation/CL/GEMMReshapeLHSMatrix.cpp @@ -21,11 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" diff --git a/tests/validation/CL/GEMMReshapeRHSMatrix.cpp b/tests/validation/CL/GEMMReshapeRHSMatrix.cpp index 579ed32afe..14048e81ec 100644 --- a/tests/validation/CL/GEMMReshapeRHSMatrix.cpp +++ b/tests/validation/CL/GEMMReshapeRHSMatrix.cpp @@ -21,11 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" diff --git a/tests/validation/CL/Gather.cpp b/tests/validation/CL/Gather.cpp index 6126231d0d..f0b87d7d9f 100644 --- a/tests/validation/CL/Gather.cpp +++ b/tests/validation/CL/Gather.cpp @@ -25,7 +25,6 @@ #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/runtime/CL/functions/CLGather.h" - #include "tests/CL/CLAccessor.h" #include "tests/datasets/GatherDataset.h" #include "tests/framework/Asserts.h" diff --git a/tests/validation/CL/GlobalPoolingLayer.cpp b/tests/validation/CL/GlobalPoolingLayer.cpp index 5328fc8448..246368e66d 100644 --- a/tests/validation/CL/GlobalPoolingLayer.cpp +++ b/tests/validation/CL/GlobalPoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/HOGDescriptor.cpp b/tests/validation/CL/HOGDescriptor.cpp index 7c014b5d22..c6b2763dfd 100644 --- a/tests/validation/CL/HOGDescriptor.cpp +++ b/tests/validation/CL/HOGDescriptor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/HOGDetector.cpp b/tests/validation/CL/HOGDetector.cpp index 78edf0fd27..9f74c728cf 100644 --- a/tests/validation/CL/HOGDetector.cpp +++ b/tests/validation/CL/HOGDetector.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/HOGMultiDetection.cpp b/tests/validation/CL/HOGMultiDetection.cpp index 091ff9e9db..5557fde33c 100644 --- a/tests/validation/CL/HOGMultiDetection.cpp +++ b/tests/validation/CL/HOGMultiDetection.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/Im2Col.cpp b/tests/validation/CL/Im2Col.cpp index 12b082fe13..e7e46b7bc5 100644 --- a/tests/validation/CL/Im2Col.cpp +++ b/tests/validation/CL/Im2Col.cpp @@ -21,9 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" #include "arm_compute/core/Types.h" - +#include "src/core/CL/kernels/CLIm2ColKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/framework/Asserts.h" diff --git a/tests/validation/CL/InstanceNormalizationLayer.cpp b/tests/validation/CL/InstanceNormalizationLayer.cpp index a30e3260c6..a52ebc5bfe 100644 --- a/tests/validation/CL/InstanceNormalizationLayer.cpp +++ b/tests/validation/CL/InstanceNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/L2NormalizeLayer.cpp b/tests/validation/CL/L2NormalizeLayer.cpp index 9502df5ade..bcf68a526c 100644 --- a/tests/validation/CL/L2NormalizeLayer.cpp +++ b/tests/validation/CL/L2NormalizeLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/LSTMLayerQuantized.cpp b/tests/validation/CL/LSTMLayerQuantized.cpp index f975bfb196..fe533ee914 100644 --- a/tests/validation/CL/LSTMLayerQuantized.cpp +++ b/tests/validation/CL/LSTMLayerQuantized.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,7 +22,6 @@ * SOFTWARE. */ #include "arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h" - #include "tests/CL/CLAccessor.h" #include "tests/PaddingCalculator.h" #include "tests/Utils.h" diff --git a/tests/validation/CL/LogSoftmaxLayer.cpp b/tests/validation/CL/LogSoftmaxLayer.cpp index 8fdc745d13..b7f6a66e42 100644 --- a/tests/validation/CL/LogSoftmaxLayer.cpp +++ b/tests/validation/CL/LogSoftmaxLayer.cpp @@ -21,7 +21,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" diff --git a/tests/validation/CL/MeanStdDevNormalizationLayer.cpp b/tests/validation/CL/MeanStdDevNormalizationLayer.cpp index a355f9eb1c..e77a21ed7f 100644 --- a/tests/validation/CL/MeanStdDevNormalizationLayer.cpp +++ b/tests/validation/CL/MeanStdDevNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/NormalizationLayer.cpp b/tests/validation/CL/NormalizationLayer.cpp index 88949806d5..1aed2786ff 100644 --- a/tests/validation/CL/NormalizationLayer.cpp +++ b/tests/validation/CL/NormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/OpticalFlow.cpp b/tests/validation/CL/OpticalFlow.cpp index cf60038d4b..7c1ff5ed57 100644 --- a/tests/validation/CL/OpticalFlow.cpp +++ b/tests/validation/CL/OpticalFlow.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/PriorBoxLayer.cpp b/tests/validation/CL/PriorBoxLayer.cpp index c63b093844..780f4796fa 100644 --- a/tests/validation/CL/PriorBoxLayer.cpp +++ b/tests/validation/CL/PriorBoxLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/QLSTMLayerNormalization.cpp b/tests/validation/CL/QLSTMLayerNormalization.cpp index a927be17bb..1c7dee4612 100644 --- a/tests/validation/CL/QLSTMLayerNormalization.cpp +++ b/tests/validation/CL/QLSTMLayerNormalization.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h" +#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" diff --git a/tests/validation/CL/RNNLayer.cpp b/tests/validation/CL/RNNLayer.cpp index 4e67868943..23219bd7b0 100644 --- a/tests/validation/CL/RNNLayer.cpp +++ b/tests/validation/CL/RNNLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/Range.cpp b/tests/validation/CL/Range.cpp index bf81f55e41..c4e0e17aa0 100644 --- a/tests/validation/CL/Range.cpp +++ b/tests/validation/CL/Range.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/ReduceMean.cpp b/tests/validation/CL/ReduceMean.cpp index 1dc6c615a9..947f84af49 100644 --- a/tests/validation/CL/ReduceMean.cpp +++ b/tests/validation/CL/ReduceMean.cpp @@ -25,7 +25,6 @@ #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/runtime/CL/functions/CLReduceMean.h" - #include "tests/CL/CLAccessor.h" #include "tests/datasets/ShapeDatasets.h" #include "tests/datasets/SplitDataset.h" diff --git a/tests/validation/CL/Reverse.cpp b/tests/validation/CL/Reverse.cpp index ed2c6e337a..11df0e7803 100644 --- a/tests/validation/CL/Reverse.cpp +++ b/tests/validation/CL/Reverse.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/SoftmaxLayer.cpp b/tests/validation/CL/SoftmaxLayer.cpp index 76289cab44..fe31b00e00 100644 --- a/tests/validation/CL/SoftmaxLayer.cpp +++ b/tests/validation/CL/SoftmaxLayer.cpp @@ -21,7 +21,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" diff --git a/tests/validation/CL/SpaceToBatchLayer.cpp b/tests/validation/CL/SpaceToBatchLayer.cpp index b2339399a3..971312e379 100644 --- a/tests/validation/CL/SpaceToBatchLayer.cpp +++ b/tests/validation/CL/SpaceToBatchLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/SpaceToDepthLayer.cpp b/tests/validation/CL/SpaceToDepthLayer.cpp index 25b4bcd70c..b9e767fb65 100644 --- a/tests/validation/CL/SpaceToDepthLayer.cpp +++ b/tests/validation/CL/SpaceToDepthLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/TableLookup.cpp b/tests/validation/CL/TableLookup.cpp index 93e313096c..f435c60c13 100644 --- a/tests/validation/CL/TableLookup.cpp +++ b/tests/validation/CL/TableLookup.cpp @@ -26,7 +26,6 @@ #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/runtime/CL/functions/CLTableLookup.h" - #include "tests/CL/CLAccessor.h" #include "tests/CL/CLLutAccessor.h" #include "tests/PaddingCalculator.h" @@ -34,7 +33,6 @@ #include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" - #include "tests/validation/Helpers.h" #include "tests/validation/Validation.h" #include "tests/validation/fixtures/TableLookupFixture.h" diff --git a/tests/validation/CL/Tile.cpp b/tests/validation/CL/Tile.cpp index 73f4aa82a2..a06c05744f 100644 --- a/tests/validation/CL/Tile.cpp +++ b/tests/validation/CL/Tile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/CL/UNIT/DynamicTensor.cpp b/tests/validation/CL/UNIT/DynamicTensor.cpp index b6302846a7..833256039e 100644 --- a/tests/validation/CL/UNIT/DynamicTensor.cpp +++ b/tests/validation/CL/UNIT/DynamicTensor.cpp @@ -28,6 +28,12 @@ #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/MemoryManagerOnDemand.h" #include "arm_compute/runtime/PoolManager.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/core/CL/kernels/CLIm2ColKernel.h" +#include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h" +#include "src/core/CL/kernels/CLReductionOperationKernel.h" +#include "src/core/CL/kernels/CLWeightsReshapeKernel.h" #include "tests/AssetsLibrary.h" #include "tests/CL/CLAccessor.h" #include "tests/Globals.h" diff --git a/tests/validation/CL/UNIT/Tuner.cpp b/tests/validation/CL/UNIT/Tuner.cpp index ee5c76ce5f..cf2513bf2c 100644 --- a/tests/validation/CL/UNIT/Tuner.cpp +++ b/tests/validation/CL/UNIT/Tuner.cpp @@ -21,10 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/tuners/BifrostTuner.h" +#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h" #include "tests/Utils.h" #include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" diff --git a/tests/validation/CL/UNIT/WeightsRetention.cpp b/tests/validation/CL/UNIT/WeightsRetention.cpp index 7234e47642..acf795e48b 100644 --- a/tests/validation/CL/UNIT/WeightsRetention.cpp +++ b/tests/validation/CL/UNIT/WeightsRetention.cpp @@ -22,6 +22,18 @@ * SOFTWARE. */ #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" +#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" +#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" +#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "tests/AssetsLibrary.h" #include "tests/CL/CLAccessor.h" #include "tests/Globals.h" diff --git a/tests/validation/CL/WeightsReshape.cpp b/tests/validation/CL/WeightsReshape.cpp index 3e7ecc3408..d04c10cee2 100644 --- a/tests/validation/CL/WeightsReshape.cpp +++ b/tests/validation/CL/WeightsReshape.cpp @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/CL/kernels/CLWeightsReshapeKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/datasets/ShapeDatasets.h" diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp index d1522f3e7f..f206e92493 100644 --- a/tests/validation/CL/Winograd.cpp +++ b/tests/validation/CL/Winograd.cpp @@ -21,8 +21,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h" -#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" @@ -30,6 +28,8 @@ #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h" +#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h" +#include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" diff --git a/tests/validation/CL/YOLOLayer.cpp b/tests/validation/CL/YOLOLayer.cpp index f28082b74b..95c18d3d95 100644 --- a/tests/validation/CL/YOLOLayer.cpp +++ b/tests/validation/CL/YOLOLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * -- cgit v1.2.1