From ebcebf1dee7f8314976b1e0cabd62b4cf893d765 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Wed, 21 Oct 2020 00:04:14 +0100 Subject: COMPMID-3638: Move NEON kernels Signed-off-by: Michalis Spyrou Change-Id: Ieed3e4bc8be7fef80c90c5094599b477a56fc473 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4285 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins --- .../CPPBoxWithNonMaximaSuppressionLimitKernel.h | 2 +- .../core/CPP/kernels/CPPCornerCandidatesKernel.h | 4 +- .../CPPDetectionWindowNonMaximaSuppressionKernel.h | 6 +- arm_compute/core/NEON/INEKernel.h | 34 --- arm_compute/core/NEON/INESimpleKernel.h | 34 --- arm_compute/core/NEON/NEKernels.h | 151 ----------- .../core/NEON/kernels/NEAbsoluteDifferenceKernel.h | 86 ------ arm_compute/core/NEON/kernels/NEAccumulateKernel.h | 139 ---------- .../core/NEON/kernels/NEActivationLayerKernel.h | 85 ------ .../core/NEON/kernels/NEArithmeticAdditionKernel.h | 106 -------- .../NEON/kernels/NEArithmeticSubtractionKernel.h | 118 -------- .../NEON/kernels/NEBatchConcatenateLayerKernel.h | 89 ------ .../NEON/kernels/NEBatchNormalizationLayerKernel.h | 139 ---------- .../core/NEON/kernels/NEBatchToSpaceLayerKernel.h | 101 ------- arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h | 72 ----- arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h | 70 ----- arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h | 72 ----- arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h | 72 ----- .../NEON/kernels/NEBoundingBoxTransformKernel.h | 95 ------- arm_compute/core/NEON/kernels/NEBox3x3Kernel.h | 71 ----- arm_compute/core/NEON/kernels/NECannyEdgeKernel.h | 189 ------------- .../core/NEON/kernels/NEChannelCombineKernel.h | 129 --------- .../core/NEON/kernels/NEChannelExtractKernel.h | 111 -------- .../NEON/kernels/NEChannelShuffleLayerKernel.h | 80 ------ arm_compute/core/NEON/kernels/NECol2ImKernel.h | 115 -------- .../core/NEON/kernels/NEColorConvertKernel.h | 93 ------- .../kernels/NEConvertFullyConnectedWeightsKernel.h | 95 ------- .../kernels/NEConvertQuantizedSignednessKernel.h | 76 ------ .../core/NEON/kernels/NEConvolutionKernel.h | 267 ------------------ arm_compute/core/NEON/kernels/NECopyKernel.h | 78 ------ arm_compute/core/NEON/kernels/NECropKernel.h | 114 -------- .../NEON/kernels/NECumulativeDistributionKernel.h | 83 ------ .../NEON/kernels/NEDepthConcatenateLayerKernel.h | 89 ------ .../core/NEON/kernels/NEDepthConvertLayerKernel.h | 94 ------- .../core/NEON/kernels/NEDepthToSpaceLayerKernel.h | 81 ------ .../NEDepthwiseConvolutionLayerNativeKernel.h | 129 --------- .../NEON/kernels/NEDequantizationLayerKernel.h | 76 ------ arm_compute/core/NEON/kernels/NEDerivativeKernel.h | 98 ------- arm_compute/core/NEON/kernels/NEDilateKernel.h | 53 ---- .../NEON/kernels/NEDirectConvolutionLayerKernel.h | 108 -------- .../NEDirectConvolutionLayerOutputStageKernel.h | 102 ------- .../NEON/kernels/NEElementwiseOperationKernel.h | 214 --------------- .../core/NEON/kernels/NEElementwiseUnaryKernel.h | 103 ------- arm_compute/core/NEON/kernels/NEErodeKernel.h | 53 ---- .../core/NEON/kernels/NEFFTDigitReverseKernel.h | 93 ------- .../core/NEON/kernels/NEFFTRadixStageKernel.h | 103 ------- arm_compute/core/NEON/kernels/NEFFTScaleKernel.h | 84 ------ .../core/NEON/kernels/NEFastCornersKernel.h | 76 ------ arm_compute/core/NEON/kernels/NEFillArrayKernel.h | 77 ------ arm_compute/core/NEON/kernels/NEFillBorderKernel.h | 82 ------ .../core/NEON/kernels/NEFlattenLayerKernel.h | 81 ------ arm_compute/core/NEON/kernels/NEFloorKernel.h | 60 ----- .../NEON/kernels/NEFuseBatchNormalizationKernel.h | 116 -------- .../core/NEON/kernels/NEGEMMAssemblyBaseKernel.h | 89 ------ .../core/NEON/kernels/NEGEMMInterleave4x4Kernel.h | 102 ------- .../NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h | 90 ------- .../kernels/NEGEMMLowpOffsetContributionKernel.h | 103 ------- ...NEGEMMLowpOffsetContributionOutputStageKernel.h | 133 --------- .../NEGEMMLowpQuantizeDownInt32ScaleKernel.h | 112 -------- ...antizeDownInt32ToInt16ScaleByFixedPointKernel.h | 116 -------- ...uantizeDownInt32ToInt8ScaleByFixedPointKernel.h | 119 -------- ...antizeDownInt32ToUint8ScaleByFixedPointKernel.h | 119 -------- .../core/NEON/kernels/NEGEMMLowpReductionKernel.h | 170 ------------ .../core/NEON/kernels/NEGEMMMatrixAdditionKernel.h | 96 ------- .../core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h | 94 ------- .../core/NEON/kernels/NEGEMMTranspose1xWKernel.h | 95 ------- arm_compute/core/NEON/kernels/NEGatherKernel.h | 113 -------- .../core/NEON/kernels/NEGaussian3x3Kernel.h | 54 ---- .../core/NEON/kernels/NEGaussian5x5Kernel.h | 81 ------ .../core/NEON/kernels/NEGaussianPyramidKernel.h | 105 -------- .../NEON/kernels/NEGenerateProposalsLayerKernel.h | 85 ------ .../core/NEON/kernels/NEHOGDescriptorKernel.h | 149 ---------- .../core/NEON/kernels/NEHOGDetectorKernel.h | 89 ------ .../core/NEON/kernels/NEHarrisCornersKernel.h | 105 -------- .../NEON/kernels/NEHeightConcatenateLayerKernel.h | 83 ------ arm_compute/core/NEON/kernels/NEHistogramKernel.h | 135 ---------- arm_compute/core/NEON/kernels/NEIm2ColKernel.h | 139 ---------- .../kernels/NEInstanceNormalizationLayerKernel.h | 96 ------- .../core/NEON/kernels/NEIntegralImageKernel.h | 54 ---- .../core/NEON/kernels/NEL2NormalizeLayerKernel.h | 90 ------- arm_compute/core/NEON/kernels/NELKTrackerKernel.h | 149 ---------- .../NELocallyConnectedMatrixMultiplyKernel.h | 77 ------ .../core/NEON/kernels/NEMagnitudePhaseKernel.h | 101 ------- .../core/NEON/kernels/NEMaxUnpoolingLayerKernel.h | 97 ------- arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h | 83 ------ .../NEON/kernels/NEMeanStdDevNormalizationKernel.h | 98 ------- arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h | 54 ---- arm_compute/core/NEON/kernels/NEMemsetKernel.h | 71 ----- .../core/NEON/kernels/NEMinMaxLayerKernel.h | 90 ------- .../core/NEON/kernels/NEMinMaxLocationKernel.h | 171 ------------ .../core/NEON/kernels/NENonLinearFilterKernel.h | 151 ----------- .../NEON/kernels/NENonMaximaSuppression3x3Kernel.h | 108 -------- .../core/NEON/kernels/NENormalizationLayerKernel.h | 108 -------- arm_compute/core/NEON/kernels/NEPadLayerKernel.h | 113 -------- arm_compute/core/NEON/kernels/NEPermuteKernel.h | 102 ------- .../NEON/kernels/NEPixelWiseMultiplicationKernel.h | 186 ------------- .../core/NEON/kernels/NEPoolingLayerKernel.h | 229 ---------------- .../core/NEON/kernels/NEPriorBoxLayerKernel.h | 98 ------- .../NEON/kernels/NEQLSTMLayerNormalizationKernel.h | 137 ---------- .../core/NEON/kernels/NEQuantizationLayerKernel.h | 102 ------- .../core/NEON/kernels/NEROIAlignLayerKernel.h | 101 ------- .../core/NEON/kernels/NEROIPoolingLayerKernel.h | 81 ------ arm_compute/core/NEON/kernels/NERangeKernel.h | 90 ------- .../core/NEON/kernels/NEReductionOperationKernel.h | 92 ------- arm_compute/core/NEON/kernels/NERemapKernel.h | 83 ------ arm_compute/core/NEON/kernels/NEReorgLayerKernel.h | 83 ------ .../core/NEON/kernels/NEReshapeLayerKernel.h | 63 ----- arm_compute/core/NEON/kernels/NEReverseKernel.h | 80 ------ arm_compute/core/NEON/kernels/NEScaleKernel.h | 127 --------- arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h | 86 ------ arm_compute/core/NEON/kernels/NESelectKernel.h | 103 ------- arm_compute/core/NEON/kernels/NESobel3x3Kernel.h | 86 ------ arm_compute/core/NEON/kernels/NESobel5x5Kernel.h | 126 --------- arm_compute/core/NEON/kernels/NESobel7x7Kernel.h | 130 --------- .../core/NEON/kernels/NESoftmaxLayerKernel.h | 139 ---------- .../core/NEON/kernels/NESpaceToBatchLayerKernel.h | 111 -------- .../core/NEON/kernels/NESpaceToDepthLayerKernel.h | 81 ------ arm_compute/core/NEON/kernels/NEStackLayerKernel.h | 93 ------- .../core/NEON/kernels/NEStridedSliceKernel.h | 102 ------- .../core/NEON/kernels/NETableLookupKernel.h | 80 ------ arm_compute/core/NEON/kernels/NEThresholdKernel.h | 84 ------ arm_compute/core/NEON/kernels/NETileKernel.h | 76 ------ arm_compute/core/NEON/kernels/NETransposeKernel.h | 90 ------- .../core/NEON/kernels/NEUpsampleLayerKernel.h | 99 ------- arm_compute/core/NEON/kernels/NEWarpKernel.h | 129 --------- .../core/NEON/kernels/NEWeightsReshapeKernel.h | 109 -------- .../NEON/kernels/NEWidthConcatenateLayerKernel.h | 82 ------ arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h | 106 -------- .../core/NEON/kernels/assembly/arm_gemm_local.hpp | 37 --- arm_compute/core/Types.h | 9 + arm_compute/core/utils/misc/Traits.h | 3 +- arm_compute/runtime/CL/functions/CLHarrisCorners.h | 4 +- arm_compute/runtime/IOperator.h | 2 + arm_compute/runtime/ITransformWeights.h | 5 +- arm_compute/runtime/NEON/INEOperator.h | 5 +- arm_compute/runtime/NEON/INESimpleFunction.h | 21 +- .../runtime/NEON/INESimpleFunctionNoBorder.h | 7 +- .../runtime/NEON/functions/NEAbsoluteDifference.h | 18 +- arm_compute/runtime/NEON/functions/NEAccumulate.h | 38 ++- .../runtime/NEON/functions/NEActivationLayer.h | 17 +- .../runtime/NEON/functions/NEArgMinMaxLayer.h | 10 + .../runtime/NEON/functions/NEArithmeticAddition.h | 12 + .../NEON/functions/NEBatchNormalizationLayer.h | 20 +- .../runtime/NEON/functions/NEBatchToSpaceLayer.h | 16 +- arm_compute/runtime/NEON/functions/NEBitwiseAnd.h | 14 +- arm_compute/runtime/NEON/functions/NEBitwiseNot.h | 2 +- arm_compute/runtime/NEON/functions/NEBitwiseOr.h | 2 +- arm_compute/runtime/NEON/functions/NEBitwiseXor.h | 2 +- .../NEON/functions/NEBoundingBoxTransform.h | 7 +- arm_compute/runtime/NEON/functions/NEBox3x3.h | 2 +- arm_compute/runtime/NEON/functions/NECannyEdge.h | 36 +-- arm_compute/runtime/NEON/functions/NECast.h | 5 +- .../runtime/NEON/functions/NEChannelCombine.h | 2 +- .../runtime/NEON/functions/NEChannelExtract.h | 2 +- .../runtime/NEON/functions/NEChannelShuffleLayer.h | 4 +- arm_compute/runtime/NEON/functions/NECol2Im.h | 5 +- .../runtime/NEON/functions/NEColorConvert.h | 2 +- .../runtime/NEON/functions/NEComputeAllAnchors.h | 9 +- .../runtime/NEON/functions/NEConcatenateLayer.h | 19 +- .../functions/NEConvertFullyConnectedWeights.h | 17 +- arm_compute/runtime/NEON/functions/NEConvolution.h | 59 +++- .../runtime/NEON/functions/NEConvolutionLayer.h | 13 +- arm_compute/runtime/NEON/functions/NECopy.h | 15 +- arm_compute/runtime/NEON/functions/NECropResize.h | 4 +- .../runtime/NEON/functions/NEDepthConvertLayer.h | 3 + .../runtime/NEON/functions/NEDepthToSpaceLayer.h | 16 +- .../NEON/functions/NEDepthwiseConvolutionLayer.h | 71 ++--- .../runtime/NEON/functions/NEDequantizationLayer.h | 1 + arm_compute/runtime/NEON/functions/NEDerivative.h | 22 +- .../NEON/functions/NEDetectionPostProcessLayer.h | 2 + arm_compute/runtime/NEON/functions/NEDilate.h | 2 +- .../NEON/functions/NEDirectConvolutionLayer.h | 37 ++- .../NEON/functions/NEElementwiseUnaryLayer.h | 2 + .../runtime/NEON/functions/NEEqualizeHistogram.h | 34 ++- arm_compute/runtime/NEON/functions/NEErode.h | 2 +- arm_compute/runtime/NEON/functions/NEFFT1D.h | 38 ++- arm_compute/runtime/NEON/functions/NEFFT2D.h | 12 +- .../runtime/NEON/functions/NEFFTConvolutionLayer.h | 4 +- arm_compute/runtime/NEON/functions/NEFastCorners.h | 36 ++- arm_compute/runtime/NEON/functions/NEFill.h | 3 +- arm_compute/runtime/NEON/functions/NEFillBorder.h | 7 +- .../runtime/NEON/functions/NEFlattenLayer.h | 3 +- arm_compute/runtime/NEON/functions/NEFloor.h | 3 +- .../runtime/NEON/functions/NEFullyConnectedLayer.h | 23 +- .../NEON/functions/NEFuseBatchNormalization.h | 8 +- arm_compute/runtime/NEON/functions/NEGEMM.h | 33 +-- .../NEON/functions/NEGEMMConvolutionLayer.h | 29 +- .../runtime/NEON/functions/NEGEMMInterleave4x4.h | 2 +- .../NEGEMMLowpAssemblyMatrixMultiplyCore.h | 23 +- .../NEON/functions/NEGEMMLowpMatrixMultiplyCore.h | 46 ++-- .../runtime/NEON/functions/NEGEMMLowpOutputStage.h | 50 ++++ .../runtime/NEON/functions/NEGEMMTranspose1xW.h | 16 +- arm_compute/runtime/NEON/functions/NEGather.h | 3 +- arm_compute/runtime/NEON/functions/NEGaussian3x3.h | 2 +- arm_compute/runtime/NEON/functions/NEGaussian5x5.h | 27 +- .../runtime/NEON/functions/NEGaussianPyramid.h | 34 ++- .../NEON/functions/NEGenerateProposalsLayer.h | 34 +-- .../runtime/NEON/functions/NEHOGDescriptor.h | 30 ++- arm_compute/runtime/NEON/functions/NEHOGDetector.h | 18 +- arm_compute/runtime/NEON/functions/NEHOGGradient.h | 25 +- .../runtime/NEON/functions/NEHOGMultiDetection.h | 12 +- .../runtime/NEON/functions/NEHarrisCorners.h | 20 +- arm_compute/runtime/NEON/functions/NEHistogram.h | 25 +- arm_compute/runtime/NEON/functions/NEIm2Col.h | 18 +- .../NEON/functions/NEInstanceNormalizationLayer.h | 28 +- .../runtime/NEON/functions/NEIntegralImage.h | 14 +- .../runtime/NEON/functions/NEL2NormalizeLayer.h | 20 +- arm_compute/runtime/NEON/functions/NELSTMLayer.h | 21 +- .../runtime/NEON/functions/NELSTMLayerQuantized.h | 4 +- .../runtime/NEON/functions/NELaplacianPyramid.h | 12 +- .../NEON/functions/NELaplacianReconstruct.h | 12 +- .../NEON/functions/NELocallyConnectedLayer.h | 30 ++- arm_compute/runtime/NEON/functions/NEMagnitude.h | 15 +- .../runtime/NEON/functions/NEMaxUnpoolingLayer.h | 22 +- arm_compute/runtime/NEON/functions/NEMeanStdDev.h | 27 +- .../functions/NEMeanStdDevNormalizationLayer.h | 15 +- arm_compute/runtime/NEON/functions/NEMedian3x3.h | 2 +- .../runtime/NEON/functions/NEMinMaxLocation.h | 20 +- .../runtime/NEON/functions/NENonLinearFilter.h | 2 +- .../NEON/functions/NENonMaximaSuppression3x3.h | 2 +- .../runtime/NEON/functions/NENormalizationLayer.h | 21 +- arm_compute/runtime/NEON/functions/NEOpticalFlow.h | 29 +- arm_compute/runtime/NEON/functions/NEPReluLayer.h | 1 + arm_compute/runtime/NEON/functions/NEPadLayer.h | 38 ++- arm_compute/runtime/NEON/functions/NEPermute.h | 3 +- arm_compute/runtime/NEON/functions/NEPhase.h | 4 +- .../NEON/functions/NEPixelWiseMultiplication.h | 1 + .../runtime/NEON/functions/NEPoolingLayer.h | 24 +- .../runtime/NEON/functions/NEPriorBoxLayer.h | 4 +- arm_compute/runtime/NEON/functions/NEQLSTMLayer.h | 186 ++++++------- .../runtime/NEON/functions/NEQuantizationLayer.h | 4 +- arm_compute/runtime/NEON/functions/NERNNLayer.h | 25 +- .../runtime/NEON/functions/NEROIAlignLayer.h | 6 +- .../runtime/NEON/functions/NEROIPoolingLayer.h | 18 +- arm_compute/runtime/NEON/functions/NERange.h | 18 +- arm_compute/runtime/NEON/functions/NEReduceMean.h | 11 +- .../runtime/NEON/functions/NEReductionOperation.h | 27 +- arm_compute/runtime/NEON/functions/NERemap.h | 2 +- arm_compute/runtime/NEON/functions/NEReorgLayer.h | 3 +- .../runtime/NEON/functions/NEReshapeLayer.h | 13 +- arm_compute/runtime/NEON/functions/NEReverse.h | 3 +- arm_compute/runtime/NEON/functions/NEScale.h | 2 +- arm_compute/runtime/NEON/functions/NESelect.h | 7 +- arm_compute/runtime/NEON/functions/NESobel3x3.h | 2 +- arm_compute/runtime/NEON/functions/NESobel5x5.h | 29 +- arm_compute/runtime/NEON/functions/NESobel7x7.h | 29 +- .../runtime/NEON/functions/NESoftmaxLayer.h | 31 ++- .../runtime/NEON/functions/NESpaceToBatchLayer.h | 16 +- .../runtime/NEON/functions/NESpaceToDepthLayer.h | 13 +- arm_compute/runtime/NEON/functions/NEStackLayer.h | 22 +- arm_compute/runtime/NEON/functions/NETableLookup.h | 2 +- arm_compute/runtime/NEON/functions/NEThreshold.h | 1 + arm_compute/runtime/NEON/functions/NETile.h | 3 +- arm_compute/runtime/NEON/functions/NETranspose.h | 3 +- arm_compute/runtime/NEON/functions/NEUnstack.h | 10 + .../runtime/NEON/functions/NEUpsampleLayer.h | 18 +- arm_compute/runtime/NEON/functions/NEWarpAffine.h | 2 +- .../runtime/NEON/functions/NEWarpPerspective.h | 2 +- .../NEON/functions/NEWinogradConvolutionLayer.h | 20 +- arm_compute/runtime/NEON/functions/NEYOLOLayer.h | 4 +- docs/ComputeLibrary.dir | 6 +- examples/neon_cartoon_effect.cpp | 2 +- src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp | 4 +- src/core/NEON/INEKernel.h | 34 +++ src/core/NEON/INESimpleKernel.h | 34 +++ src/core/NEON/NEKernels.h | 151 +++++++++++ src/core/NEON/NETracePoint.cpp | 2 +- .../NEON/kernels/NEAbsoluteDifferenceKernel.cpp | 2 +- src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h | 86 ++++++ src/core/NEON/kernels/NEAccumulateKernel.cpp | 8 +- src/core/NEON/kernels/NEAccumulateKernel.h | 183 +++++++++++++ src/core/NEON/kernels/NEActivationLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEActivationLayerKernel.h | 87 ++++++ .../NEON/kernels/NEArithmeticAdditionKernel.cpp | 2 +- src/core/NEON/kernels/NEArithmeticAdditionKernel.h | 106 ++++++++ .../NEON/kernels/NEArithmeticSubtractionKernel.cpp | 2 +- .../NEON/kernels/NEArithmeticSubtractionKernel.h | 118 ++++++++ .../NEON/kernels/NEBatchConcatenateLayerKernel.cpp | 2 +- .../NEON/kernels/NEBatchConcatenateLayerKernel.h | 89 ++++++ .../kernels/NEBatchNormalizationLayerKernel.cpp | 2 +- .../NEON/kernels/NEBatchNormalizationLayerKernel.h | 139 ++++++++++ .../NEON/kernels/NEBatchToSpaceLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h | 101 +++++++ src/core/NEON/kernels/NEBitwiseAndKernel.cpp | 2 +- src/core/NEON/kernels/NEBitwiseAndKernel.h | 74 +++++ src/core/NEON/kernels/NEBitwiseNotKernel.cpp | 2 +- src/core/NEON/kernels/NEBitwiseNotKernel.h | 72 +++++ src/core/NEON/kernels/NEBitwiseOrKernel.cpp | 2 +- src/core/NEON/kernels/NEBitwiseOrKernel.h | 74 +++++ src/core/NEON/kernels/NEBitwiseXorKernel.cpp | 2 +- src/core/NEON/kernels/NEBitwiseXorKernel.h | 74 +++++ .../NEON/kernels/NEBoundingBoxTransformKernel.cpp | 2 +- .../NEON/kernels/NEBoundingBoxTransformKernel.h | 95 +++++++ src/core/NEON/kernels/NEBox3x3Kernel.cpp | 4 +- src/core/NEON/kernels/NEBox3x3Kernel.h | 95 +++++++ src/core/NEON/kernels/NECannyEdgeKernel.cpp | 15 +- src/core/NEON/kernels/NECannyEdgeKernel.h | 189 +++++++++++++ src/core/NEON/kernels/NEChannelCombineKernel.cpp | 2 +- src/core/NEON/kernels/NEChannelCombineKernel.h | 129 +++++++++ src/core/NEON/kernels/NEChannelExtractKernel.cpp | 4 +- src/core/NEON/kernels/NEChannelExtractKernel.h | 111 ++++++++ .../NEON/kernels/NEChannelShuffleLayerKernel.cpp | 2 +- .../NEON/kernels/NEChannelShuffleLayerKernel.h | 80 ++++++ src/core/NEON/kernels/NECol2ImKernel.cpp | 2 +- src/core/NEON/kernels/NECol2ImKernel.h | 115 ++++++++ src/core/NEON/kernels/NEColorConvertKernel.cpp | 2 +- src/core/NEON/kernels/NEColorConvertKernel.h | 93 +++++++ .../NEConvertFullyConnectedWeightsKernel.cpp | 2 +- .../kernels/NEConvertFullyConnectedWeightsKernel.h | 95 +++++++ .../kernels/NEConvertQuantizedSignednessKernel.cpp | 2 +- .../kernels/NEConvertQuantizedSignednessKernel.h | 78 ++++++ src/core/NEON/kernels/NEConvolutionKernel.cpp | 2 +- src/core/NEON/kernels/NEConvolutionKernel.h | 299 +++++++++++++++++++++ src/core/NEON/kernels/NECopyKernel.cpp | 2 +- src/core/NEON/kernels/NECopyKernel.h | 80 ++++++ src/core/NEON/kernels/NECropKernel.cpp | 2 +- src/core/NEON/kernels/NECropKernel.h | 114 ++++++++ .../kernels/NECumulativeDistributionKernel.cpp | 2 +- .../NEON/kernels/NECumulativeDistributionKernel.h | 85 ++++++ .../NEON/kernels/NEDepthConcatenateLayerKernel.cpp | 2 +- .../NEON/kernels/NEDepthConcatenateLayerKernel.h | 89 ++++++ .../NEON/kernels/NEDepthConvertLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEDepthConvertLayerKernel.h | 96 +++++++ .../NEON/kernels/NEDepthToSpaceLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h | 81 ++++++ .../NEDepthwiseConvolutionLayerNativeKernel.cpp | 2 +- .../NEDepthwiseConvolutionLayerNativeKernel.h | 131 +++++++++ .../NEON/kernels/NEDequantizationLayerKernel.cpp | 2 +- .../NEON/kernels/NEDequantizationLayerKernel.h | 76 ++++++ src/core/NEON/kernels/NEDerivativeKernel.cpp | 2 +- src/core/NEON/kernels/NEDerivativeKernel.h | 100 +++++++ src/core/NEON/kernels/NEDilateKernel.cpp | 4 +- src/core/NEON/kernels/NEDilateKernel.h | 65 +++++ .../kernels/NEDirectConvolutionLayerKernel.cpp | 2 +- .../NEON/kernels/NEDirectConvolutionLayerKernel.h | 108 ++++++++ .../NEDirectConvolutionLayerOutputStageKernel.cpp | 2 +- .../NEDirectConvolutionLayerOutputStageKernel.h | 102 +++++++ .../NEON/kernels/NEElementwiseOperationKernel.cpp | 2 +- .../NEON/kernels/NEElementwiseOperationKernel.h | 214 +++++++++++++++ src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp | 2 +- src/core/NEON/kernels/NEElementwiseUnaryKernel.h | 103 +++++++ src/core/NEON/kernels/NEErodeKernel.cpp | 4 +- src/core/NEON/kernels/NEErodeKernel.h | 65 +++++ src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp | 2 +- src/core/NEON/kernels/NEFFTDigitReverseKernel.h | 93 +++++++ src/core/NEON/kernels/NEFFTRadixStageKernel.cpp | 2 +- src/core/NEON/kernels/NEFFTRadixStageKernel.h | 103 +++++++ src/core/NEON/kernels/NEFFTScaleKernel.cpp | 2 +- src/core/NEON/kernels/NEFFTScaleKernel.h | 84 ++++++ src/core/NEON/kernels/NEFastCornersKernel.cpp | 2 +- src/core/NEON/kernels/NEFastCornersKernel.h | 78 ++++++ src/core/NEON/kernels/NEFillArrayKernel.cpp | 2 +- src/core/NEON/kernels/NEFillArrayKernel.h | 77 ++++++ src/core/NEON/kernels/NEFillBorderKernel.cpp | 3 +- src/core/NEON/kernels/NEFillBorderKernel.h | 82 ++++++ src/core/NEON/kernels/NEFlattenLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEFlattenLayerKernel.h | 81 ++++++ src/core/NEON/kernels/NEFloorKernel.cpp | 4 +- src/core/NEON/kernels/NEFloorKernel.h | 72 +++++ .../kernels/NEFuseBatchNormalizationKernel.cpp | 2 +- .../NEON/kernels/NEFuseBatchNormalizationKernel.h | 116 ++++++++ src/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h | 89 ++++++ .../NEON/kernels/NEGEMMInterleave4x4Kernel.cpp | 4 +- src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h | 112 ++++++++ .../kernels/NEGEMMLowpMatrixMultiplyKernel.cpp | 2 +- .../NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h | 92 +++++++ .../kernels/NEGEMMLowpOffsetContributionKernel.cpp | 2 +- .../kernels/NEGEMMLowpOffsetContributionKernel.h | 105 ++++++++ ...GEMMLowpOffsetContributionOutputStageKernel.cpp | 2 +- ...NEGEMMLowpOffsetContributionOutputStageKernel.h | 135 ++++++++++ .../NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp | 2 +- .../NEGEMMLowpQuantizeDownInt32ScaleKernel.h | 114 ++++++++ ...tizeDownInt32ToInt16ScaleByFixedPointKernel.cpp | 2 +- ...antizeDownInt32ToInt16ScaleByFixedPointKernel.h | 118 ++++++++ ...ntizeDownInt32ToInt8ScaleByFixedPointKernel.cpp | 2 +- ...uantizeDownInt32ToInt8ScaleByFixedPointKernel.h | 121 +++++++++ ...tizeDownInt32ToUint8ScaleByFixedPointKernel.cpp | 2 +- ...antizeDownInt32ToUint8ScaleByFixedPointKernel.h | 121 +++++++++ .../NEON/kernels/NEGEMMLowpReductionKernel.cpp | 2 +- src/core/NEON/kernels/NEGEMMLowpReductionKernel.h | 196 ++++++++++++++ .../NEON/kernels/NEGEMMMatrixAdditionKernel.cpp | 2 +- src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h | 98 +++++++ .../NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp | 2 +- src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h | 94 +++++++ src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp | 4 +- src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h | 107 ++++++++ src/core/NEON/kernels/NEGatherKernel.cpp | 2 +- src/core/NEON/kernels/NEGatherKernel.h | 113 ++++++++ src/core/NEON/kernels/NEGaussian3x3Kernel.cpp | 4 +- src/core/NEON/kernels/NEGaussian3x3Kernel.h | 66 +++++ src/core/NEON/kernels/NEGaussian5x5Kernel.cpp | 8 +- src/core/NEON/kernels/NEGaussian5x5Kernel.h | 103 +++++++ src/core/NEON/kernels/NEGaussianPyramidKernel.cpp | 4 +- src/core/NEON/kernels/NEGaussianPyramidKernel.h | 105 ++++++++ .../kernels/NEGenerateProposalsLayerKernel.cpp | 2 +- .../NEON/kernels/NEGenerateProposalsLayerKernel.h | 85 ++++++ src/core/NEON/kernels/NEHOGDescriptorKernel.cpp | 2 +- src/core/NEON/kernels/NEHOGDescriptorKernel.h | 149 ++++++++++ src/core/NEON/kernels/NEHOGDetectorKernel.cpp | 2 +- src/core/NEON/kernels/NEHOGDetectorKernel.h | 89 ++++++ src/core/NEON/kernels/NEHarrisCornersKernel.cpp | 2 +- src/core/NEON/kernels/NEHarrisCornersKernel.h | 105 ++++++++ .../kernels/NEHeightConcatenateLayerKernel.cpp | 2 +- .../NEON/kernels/NEHeightConcatenateLayerKernel.h | 83 ++++++ src/core/NEON/kernels/NEHistogramKernel.cpp | 2 +- src/core/NEON/kernels/NEHistogramKernel.h | 135 ++++++++++ src/core/NEON/kernels/NEIm2ColKernel.cpp | 2 +- src/core/NEON/kernels/NEIm2ColKernel.h | 139 ++++++++++ .../kernels/NEInstanceNormalizationLayerKernel.cpp | 2 +- .../kernels/NEInstanceNormalizationLayerKernel.h | 96 +++++++ src/core/NEON/kernels/NEIntegralImageKernel.cpp | 2 +- src/core/NEON/kernels/NEIntegralImageKernel.h | 66 +++++ src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEL2NormalizeLayerKernel.h | 90 +++++++ src/core/NEON/kernels/NELKTrackerKernel.cpp | 2 +- src/core/NEON/kernels/NELKTrackerKernel.h | 141 ++++++++++ .../NELocallyConnectedMatrixMultiplyKernel.cpp | 2 +- .../NELocallyConnectedMatrixMultiplyKernel.h | 79 ++++++ src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp | 2 +- src/core/NEON/kernels/NEMagnitudePhaseKernel.h | 101 +++++++ .../NEON/kernels/NEMaxUnpoolingLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h | 97 +++++++ src/core/NEON/kernels/NEMeanStdDevKernel.cpp | 2 +- src/core/NEON/kernels/NEMeanStdDevKernel.h | 83 ++++++ .../kernels/NEMeanStdDevNormalizationKernel.cpp | 2 +- .../NEON/kernels/NEMeanStdDevNormalizationKernel.h | 98 +++++++ src/core/NEON/kernels/NEMedian3x3Kernel.cpp | 4 +- src/core/NEON/kernels/NEMedian3x3Kernel.h | 66 +++++ src/core/NEON/kernels/NEMemsetKernel.cpp | 2 +- src/core/NEON/kernels/NEMemsetKernel.h | 71 +++++ src/core/NEON/kernels/NEMinMaxLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEMinMaxLayerKernel.h | 90 +++++++ src/core/NEON/kernels/NEMinMaxLocationKernel.cpp | 2 +- src/core/NEON/kernels/NEMinMaxLocationKernel.h | 171 ++++++++++++ src/core/NEON/kernels/NENonLinearFilterKernel.cpp | 2 +- src/core/NEON/kernels/NENonLinearFilterKernel.h | 153 +++++++++++ .../kernels/NENonMaximaSuppression3x3Kernel.cpp | 2 +- .../NEON/kernels/NENonMaximaSuppression3x3Kernel.h | 108 ++++++++ .../NEON/kernels/NENormalizationLayerKernel.cpp | 2 +- src/core/NEON/kernels/NENormalizationLayerKernel.h | 108 ++++++++ src/core/NEON/kernels/NEPadLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEPadLayerKernel.h | 113 ++++++++ src/core/NEON/kernels/NEPermuteKernel.cpp | 4 +- src/core/NEON/kernels/NEPermuteKernel.h | 102 +++++++ .../kernels/NEPixelWiseMultiplicationKernel.cpp | 2 +- .../NEON/kernels/NEPixelWiseMultiplicationKernel.h | 186 +++++++++++++ src/core/NEON/kernels/NEPoolingLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEPoolingLayerKernel.h | 229 ++++++++++++++++ src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEPriorBoxLayerKernel.h | 100 +++++++ .../kernels/NEQLSTMLayerNormalizationKernel.cpp | 2 +- .../NEON/kernels/NEQLSTMLayerNormalizationKernel.h | 146 ++++++++++ .../NEON/kernels/NEQuantizationLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEQuantizationLayerKernel.h | 102 +++++++ src/core/NEON/kernels/NEROIAlignLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEROIAlignLayerKernel.h | 101 +++++++ src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEROIPoolingLayerKernel.h | 81 ++++++ src/core/NEON/kernels/NERangeKernel.cpp | 2 +- src/core/NEON/kernels/NERangeKernel.h | 90 +++++++ .../NEON/kernels/NEReductionOperationKernel.cpp | 4 +- src/core/NEON/kernels/NEReductionOperationKernel.h | 92 +++++++ src/core/NEON/kernels/NERemapKernel.cpp | 2 +- src/core/NEON/kernels/NERemapKernel.h | 83 ++++++ src/core/NEON/kernels/NEReorgLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEReorgLayerKernel.h | 83 ++++++ src/core/NEON/kernels/NEReshapeLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEReshapeLayerKernel.h | 75 ++++++ src/core/NEON/kernels/NEReverseKernel.cpp | 2 +- src/core/NEON/kernels/NEReverseKernel.h | 80 ++++++ src/core/NEON/kernels/NEScaleKernel.cpp | 2 +- src/core/NEON/kernels/NEScaleKernel.h | 127 +++++++++ src/core/NEON/kernels/NEScharr3x3Kernel.cpp | 2 +- src/core/NEON/kernels/NEScharr3x3Kernel.h | 86 ++++++ src/core/NEON/kernels/NESelectKernel.cpp | 2 +- src/core/NEON/kernels/NESelectKernel.h | 103 +++++++ src/core/NEON/kernels/NESobel3x3Kernel.cpp | 2 +- src/core/NEON/kernels/NESobel3x3Kernel.h | 86 ++++++ src/core/NEON/kernels/NESobel5x5Kernel.cpp | 2 +- src/core/NEON/kernels/NESobel5x5Kernel.h | 126 +++++++++ src/core/NEON/kernels/NESobel7x7Kernel.cpp | 2 +- src/core/NEON/kernels/NESobel7x7Kernel.h | 130 +++++++++ src/core/NEON/kernels/NESoftmaxLayerKernel.cpp | 2 +- src/core/NEON/kernels/NESoftmaxLayerKernel.h | 149 ++++++++++ .../NEON/kernels/NESpaceToBatchLayerKernel.cpp | 2 +- src/core/NEON/kernels/NESpaceToBatchLayerKernel.h | 111 ++++++++ .../NEON/kernels/NESpaceToDepthLayerKernel.cpp | 2 +- src/core/NEON/kernels/NESpaceToDepthLayerKernel.h | 81 ++++++ src/core/NEON/kernels/NEStackLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEStackLayerKernel.h | 93 +++++++ src/core/NEON/kernels/NEStridedSliceKernel.cpp | 2 +- src/core/NEON/kernels/NEStridedSliceKernel.h | 102 +++++++ src/core/NEON/kernels/NETableLookupKernel.cpp | 4 +- src/core/NEON/kernels/NETableLookupKernel.h | 82 ++++++ src/core/NEON/kernels/NEThresholdKernel.cpp | 2 +- src/core/NEON/kernels/NEThresholdKernel.h | 88 ++++++ src/core/NEON/kernels/NETileKernel.cpp | 2 +- src/core/NEON/kernels/NETileKernel.h | 78 ++++++ src/core/NEON/kernels/NETransposeKernel.cpp | 2 +- src/core/NEON/kernels/NETransposeKernel.h | 90 +++++++ src/core/NEON/kernels/NEUpsampleLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEUpsampleLayerKernel.h | 99 +++++++ src/core/NEON/kernels/NEWarpKernel.cpp | 2 +- src/core/NEON/kernels/NEWarpKernel.h | 131 +++++++++ src/core/NEON/kernels/NEWeightsReshapeKernel.cpp | 2 +- src/core/NEON/kernels/NEWeightsReshapeKernel.h | 109 ++++++++ .../NEON/kernels/NEWidthConcatenateLayerKernel.cpp | 2 +- .../NEON/kernels/NEWidthConcatenateLayerKernel.h | 82 ++++++ .../kernels/NEWinogradConvolutionLayerKernel.h | 2 +- src/core/NEON/kernels/NEYOLOLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEYOLOLayerKernel.h | 106 ++++++++ .../NEON/kernels/assembly/INEGEMMWrapperKernel.h | 2 +- .../NEDepthwiseConvolutionAssemblyKernelWrapper.h | 2 +- .../kernels/assembly/NEGEMMAssemblyWrapperKernel.h | 2 +- src/core/TracePoint.cpp | 2 +- src/graph/backends/NEON/NEFunctionFactory.cpp | 1 + src/graph/backends/NEON/NENodeValidator.cpp | 13 + src/runtime/NEON/INEOperator.cpp | 4 + src/runtime/NEON/INESimpleFunction.cpp | 12 +- src/runtime/NEON/INESimpleFunctionNoBorder.cpp | 4 + .../NEON/functions/NEAbsoluteDifference.cpp | 7 +- src/runtime/NEON/functions/NEAccumulate.cpp | 11 +- src/runtime/NEON/functions/NEActivationLayer.cpp | 4 +- src/runtime/NEON/functions/NEArgMinMaxLayer.cpp | 3 + .../NEON/functions/NEArithmeticAddition.cpp | 4 +- .../NEON/functions/NEArithmeticSubtraction.cpp | 2 +- .../NEON/functions/NEBatchNormalizationLayer.cpp | 11 +- src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp | 1 + src/runtime/NEON/functions/NEBitwiseAnd.cpp | 2 +- src/runtime/NEON/functions/NEBitwiseNot.cpp | 2 +- src/runtime/NEON/functions/NEBitwiseOr.cpp | 2 +- src/runtime/NEON/functions/NEBitwiseXor.cpp | 2 +- .../NEON/functions/NEBoundingBoxTransform.cpp | 1 + src/runtime/NEON/functions/NEBox3x3.cpp | 12 +- src/runtime/NEON/functions/NECannyEdge.cpp | 31 ++- src/runtime/NEON/functions/NECast.cpp | 2 +- src/runtime/NEON/functions/NEChannelCombine.cpp | 2 +- src/runtime/NEON/functions/NEChannelExtract.cpp | 2 +- .../NEON/functions/NEChannelShuffleLayer.cpp | 2 +- src/runtime/NEON/functions/NECol2Im.cpp | 2 +- src/runtime/NEON/functions/NEColorConvert.cpp | 2 +- src/runtime/NEON/functions/NEComputeAllAnchors.cpp | 1 + src/runtime/NEON/functions/NEConcatenateLayer.cpp | 8 +- .../functions/NEConvertFullyConnectedWeights.cpp | 11 +- src/runtime/NEON/functions/NEConvolution.cpp | 48 +++- src/runtime/NEON/functions/NEConvolutionLayer.cpp | 21 ++ src/runtime/NEON/functions/NECopy.cpp | 4 +- src/runtime/NEON/functions/NECropResize.cpp | 3 + .../NEON/functions/NEDeconvolutionLayer.cpp | 1 + src/runtime/NEON/functions/NEDepthConvertLayer.cpp | 2 +- src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp | 1 + .../NEON/functions/NEDepthwiseConvolutionLayer.cpp | 14 +- .../NEON/functions/NEDequantizationLayer.cpp | 2 +- src/runtime/NEON/functions/NEDerivative.cpp | 22 +- src/runtime/NEON/functions/NEDilate.cpp | 8 +- .../NEON/functions/NEDirectConvolutionLayer.cpp | 23 +- .../NEON/functions/NEElementwiseOperators.cpp | 2 +- .../NEON/functions/NEElementwiseUnaryLayer.cpp | 2 +- src/runtime/NEON/functions/NEEqualizeHistogram.cpp | 28 +- src/runtime/NEON/functions/NEErode.cpp | 13 +- src/runtime/NEON/functions/NEFFT1D.cpp | 21 +- src/runtime/NEON/functions/NEFFT2D.cpp | 5 + .../NEON/functions/NEFFTConvolutionLayer.cpp | 7 + src/runtime/NEON/functions/NEFastCorners.cpp | 35 ++- src/runtime/NEON/functions/NEFill.cpp | 1 + src/runtime/NEON/functions/NEFillBorder.cpp | 9 +- src/runtime/NEON/functions/NEFlattenLayer.cpp | 2 +- src/runtime/NEON/functions/NEFloor.cpp | 2 +- .../NEON/functions/NEFullyConnectedLayer.cpp | 21 +- .../NEON/functions/NEFuseBatchNormalization.cpp | 11 +- src/runtime/NEON/functions/NEGEMM.cpp | 32 ++- .../NEON/functions/NEGEMMConvolutionLayer.cpp | 33 ++- src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp | 2 +- .../NEGEMMLowpAssemblyMatrixMultiplyCore.cpp | 11 +- .../functions/NEGEMMLowpMatrixMultiplyCore.cpp | 75 ++++-- .../NEON/functions/NEGEMMLowpOutputStage.cpp | 16 +- src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp | 2 +- src/runtime/NEON/functions/NEGather.cpp | 2 +- src/runtime/NEON/functions/NEGaussian3x3.cpp | 13 +- src/runtime/NEON/functions/NEGaussian5x5.cpp | 27 +- src/runtime/NEON/functions/NEGaussianPyramid.cpp | 32 ++- .../NEON/functions/NEGenerateProposalsLayer.cpp | 63 +++-- src/runtime/NEON/functions/NEHOGDescriptor.cpp | 21 +- src/runtime/NEON/functions/NEHOGDetector.cpp | 7 +- src/runtime/NEON/functions/NEHOGGradient.cpp | 9 +- src/runtime/NEON/functions/NEHOGMultiDetection.cpp | 8 +- src/runtime/NEON/functions/NEHarrisCorners.cpp | 22 +- src/runtime/NEON/functions/NEHistogram.cpp | 12 +- src/runtime/NEON/functions/NEIm2Col.cpp | 9 +- .../functions/NEInstanceNormalizationLayer.cpp | 12 +- src/runtime/NEON/functions/NEIntegralImage.cpp | 13 +- src/runtime/NEON/functions/NEL2NormalizeLayer.cpp | 11 +- src/runtime/NEON/functions/NELSTMLayer.cpp | 22 +- .../NEON/functions/NELSTMLayerQuantized.cpp | 11 + src/runtime/NEON/functions/NELaplacianPyramid.cpp | 9 +- .../NEON/functions/NELaplacianReconstruct.cpp | 6 +- .../NEON/functions/NELocallyConnectedLayer.cpp | 36 ++- src/runtime/NEON/functions/NEMagnitude.cpp | 7 +- src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp | 15 +- src/runtime/NEON/functions/NEMeanStdDev.cpp | 21 +- .../functions/NEMeanStdDevNormalizationLayer.cpp | 4 +- src/runtime/NEON/functions/NEMedian3x3.cpp | 13 +- src/runtime/NEON/functions/NEMinMaxLocation.cpp | 22 +- src/runtime/NEON/functions/NENonLinearFilter.cpp | 13 +- .../NEON/functions/NENonMaximaSuppression3x3.cpp | 14 +- .../NEON/functions/NENormalizationLayer.cpp | 9 +- src/runtime/NEON/functions/NEOpticalFlow.cpp | 22 +- src/runtime/NEON/functions/NEPReluLayer.cpp | 2 +- src/runtime/NEON/functions/NEPadLayer.cpp | 15 +- src/runtime/NEON/functions/NEPermute.cpp | 2 +- src/runtime/NEON/functions/NEPhase.cpp | 7 +- .../NEON/functions/NEPixelWiseMultiplication.cpp | 2 +- src/runtime/NEON/functions/NEPoolingLayer.cpp | 21 +- src/runtime/NEON/functions/NEPriorBoxLayer.cpp | 1 + src/runtime/NEON/functions/NEQLSTMLayer.cpp | 108 ++++++-- src/runtime/NEON/functions/NEQuantizationLayer.cpp | 1 + src/runtime/NEON/functions/NERNNLayer.cpp | 20 +- src/runtime/NEON/functions/NEROIAlignLayer.cpp | 3 +- src/runtime/NEON/functions/NEROIPoolingLayer.cpp | 12 +- src/runtime/NEON/functions/NERange.cpp | 11 +- src/runtime/NEON/functions/NEReduceMean.cpp | 3 + .../NEON/functions/NEReductionOperation.cpp | 9 +- src/runtime/NEON/functions/NERemap.cpp | 15 +- src/runtime/NEON/functions/NEReorgLayer.cpp | 2 +- src/runtime/NEON/functions/NEReshapeLayer.cpp | 4 +- src/runtime/NEON/functions/NEReverse.cpp | 2 +- src/runtime/NEON/functions/NEScale.cpp | 1 + src/runtime/NEON/functions/NEScharr3x3.cpp | 8 +- src/runtime/NEON/functions/NESelect.cpp | 2 +- src/runtime/NEON/functions/NESlice.cpp | 2 +- src/runtime/NEON/functions/NESobel3x3.cpp | 13 +- src/runtime/NEON/functions/NESobel5x5.cpp | 34 ++- src/runtime/NEON/functions/NESobel7x7.cpp | 33 ++- src/runtime/NEON/functions/NESoftmaxLayer.cpp | 30 ++- src/runtime/NEON/functions/NESpaceToBatchLayer.cpp | 27 +- src/runtime/NEON/functions/NESpaceToDepthLayer.cpp | 11 +- src/runtime/NEON/functions/NEStackLayer.cpp | 9 +- src/runtime/NEON/functions/NEStridedSlice.cpp | 2 +- src/runtime/NEON/functions/NETableLookup.cpp | 2 +- src/runtime/NEON/functions/NEThreshold.cpp | 2 +- src/runtime/NEON/functions/NETile.cpp | 2 +- src/runtime/NEON/functions/NETranspose.cpp | 2 +- src/runtime/NEON/functions/NEUpsampleLayer.cpp | 12 +- src/runtime/NEON/functions/NEWarpAffine.cpp | 7 +- src/runtime/NEON/functions/NEWarpPerspective.cpp | 12 +- .../NEON/functions/NEWinogradConvolutionLayer.cpp | 4 + src/runtime/NEON/functions/NEYOLOLayer.cpp | 2 +- src/runtime/TracePoint.cpp | 2 +- tests/NEON/Helper.h | 14 +- .../NEON/DepthwiseConvolutionLayerNative.cpp | 2 +- tests/validation/NEON/FillBorder.cpp | 4 +- tests/validation/NEON/GEMM.cpp | 5 +- tests/validation/NEON/QLSTMLayerNormalization.cpp | 2 +- 653 files changed, 15877 insertions(+), 14089 deletions(-) delete mode 100644 arm_compute/core/NEON/INEKernel.h delete mode 100644 arm_compute/core/NEON/INESimpleKernel.h delete mode 100644 arm_compute/core/NEON/NEKernels.h delete mode 100644 arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEAccumulateKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEActivationLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEBox3x3Kernel.h delete mode 100644 arm_compute/core/NEON/kernels/NECannyEdgeKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEChannelCombineKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEChannelExtractKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NECol2ImKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEColorConvertKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEConvolutionKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NECopyKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NECropKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEDerivativeKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEDilateKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEErodeKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEFFTScaleKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEFastCornersKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEFillArrayKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEFillBorderKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEFloorKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGatherKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEHistogramKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEIm2ColKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEIntegralImageKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NELKTrackerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEMemsetKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h delete mode 100644 arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEPadLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEPermuteKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NERangeKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEReductionOperationKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NERemapKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEReorgLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEReverseKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEScaleKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h delete mode 100644 arm_compute/core/NEON/kernels/NESelectKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NESobel3x3Kernel.h delete mode 100644 arm_compute/core/NEON/kernels/NESobel5x5Kernel.h delete mode 100644 arm_compute/core/NEON/kernels/NESobel7x7Kernel.h delete mode 100644 arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEStackLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEStridedSliceKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NETableLookupKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEThresholdKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NETileKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NETransposeKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEWarpKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/assembly/arm_gemm_local.hpp create mode 100644 src/core/NEON/INEKernel.h create mode 100644 src/core/NEON/INESimpleKernel.h create mode 100644 src/core/NEON/NEKernels.h create mode 100644 src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h create mode 100644 src/core/NEON/kernels/NEAccumulateKernel.h create mode 100644 src/core/NEON/kernels/NEActivationLayerKernel.h create mode 100644 src/core/NEON/kernels/NEArithmeticAdditionKernel.h create mode 100644 src/core/NEON/kernels/NEArithmeticSubtractionKernel.h create mode 100644 src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h create mode 100644 src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h create mode 100644 src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h create mode 100644 src/core/NEON/kernels/NEBitwiseAndKernel.h create mode 100644 src/core/NEON/kernels/NEBitwiseNotKernel.h create mode 100644 src/core/NEON/kernels/NEBitwiseOrKernel.h create mode 100644 src/core/NEON/kernels/NEBitwiseXorKernel.h create mode 100644 src/core/NEON/kernels/NEBoundingBoxTransformKernel.h create mode 100644 src/core/NEON/kernels/NEBox3x3Kernel.h create mode 100644 src/core/NEON/kernels/NECannyEdgeKernel.h create mode 100644 src/core/NEON/kernels/NEChannelCombineKernel.h create mode 100644 src/core/NEON/kernels/NEChannelExtractKernel.h create mode 100644 src/core/NEON/kernels/NEChannelShuffleLayerKernel.h create mode 100644 src/core/NEON/kernels/NECol2ImKernel.h create mode 100644 src/core/NEON/kernels/NEColorConvertKernel.h create mode 100644 src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h create mode 100644 src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h create mode 100644 src/core/NEON/kernels/NEConvolutionKernel.h create mode 100644 src/core/NEON/kernels/NECopyKernel.h create mode 100644 src/core/NEON/kernels/NECropKernel.h create mode 100644 src/core/NEON/kernels/NECumulativeDistributionKernel.h create mode 100644 src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h create mode 100644 src/core/NEON/kernels/NEDepthConvertLayerKernel.h create mode 100644 src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h create mode 100644 src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h create mode 100644 src/core/NEON/kernels/NEDequantizationLayerKernel.h create mode 100644 src/core/NEON/kernels/NEDerivativeKernel.h create mode 100644 src/core/NEON/kernels/NEDilateKernel.h create mode 100644 src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h create mode 100644 src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h create mode 100644 src/core/NEON/kernels/NEElementwiseOperationKernel.h create mode 100644 src/core/NEON/kernels/NEElementwiseUnaryKernel.h create mode 100644 src/core/NEON/kernels/NEErodeKernel.h create mode 100644 src/core/NEON/kernels/NEFFTDigitReverseKernel.h create mode 100644 src/core/NEON/kernels/NEFFTRadixStageKernel.h create mode 100644 src/core/NEON/kernels/NEFFTScaleKernel.h create mode 100644 src/core/NEON/kernels/NEFastCornersKernel.h create mode 100644 src/core/NEON/kernels/NEFillArrayKernel.h create mode 100644 src/core/NEON/kernels/NEFillBorderKernel.h create mode 100644 src/core/NEON/kernels/NEFlattenLayerKernel.h create mode 100644 src/core/NEON/kernels/NEFloorKernel.h create mode 100644 src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h create mode 100644 src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMLowpReductionKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h create mode 100644 src/core/NEON/kernels/NEGatherKernel.h create mode 100644 src/core/NEON/kernels/NEGaussian3x3Kernel.h create mode 100644 src/core/NEON/kernels/NEGaussian5x5Kernel.h create mode 100644 src/core/NEON/kernels/NEGaussianPyramidKernel.h create mode 100644 src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h create mode 100644 src/core/NEON/kernels/NEHOGDescriptorKernel.h create mode 100644 src/core/NEON/kernels/NEHOGDetectorKernel.h create mode 100644 src/core/NEON/kernels/NEHarrisCornersKernel.h create mode 100644 src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h create mode 100644 src/core/NEON/kernels/NEHistogramKernel.h create mode 100644 src/core/NEON/kernels/NEIm2ColKernel.h create mode 100644 src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h create mode 100644 src/core/NEON/kernels/NEIntegralImageKernel.h create mode 100644 src/core/NEON/kernels/NEL2NormalizeLayerKernel.h create mode 100644 src/core/NEON/kernels/NELKTrackerKernel.h create mode 100644 src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h create mode 100644 src/core/NEON/kernels/NEMagnitudePhaseKernel.h create mode 100644 src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h create mode 100644 src/core/NEON/kernels/NEMeanStdDevKernel.h create mode 100644 src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h create mode 100644 src/core/NEON/kernels/NEMedian3x3Kernel.h create mode 100644 src/core/NEON/kernels/NEMemsetKernel.h create mode 100644 src/core/NEON/kernels/NEMinMaxLayerKernel.h create mode 100644 src/core/NEON/kernels/NEMinMaxLocationKernel.h create mode 100644 src/core/NEON/kernels/NENonLinearFilterKernel.h create mode 100644 src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h create mode 100644 src/core/NEON/kernels/NENormalizationLayerKernel.h create mode 100644 src/core/NEON/kernels/NEPadLayerKernel.h create mode 100644 src/core/NEON/kernels/NEPermuteKernel.h create mode 100644 src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h create mode 100644 src/core/NEON/kernels/NEPoolingLayerKernel.h create mode 100644 src/core/NEON/kernels/NEPriorBoxLayerKernel.h create mode 100644 src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h create mode 100644 src/core/NEON/kernels/NEQuantizationLayerKernel.h create mode 100644 src/core/NEON/kernels/NEROIAlignLayerKernel.h create mode 100644 src/core/NEON/kernels/NEROIPoolingLayerKernel.h create mode 100644 src/core/NEON/kernels/NERangeKernel.h create mode 100644 src/core/NEON/kernels/NEReductionOperationKernel.h create mode 100644 src/core/NEON/kernels/NERemapKernel.h create mode 100644 src/core/NEON/kernels/NEReorgLayerKernel.h create mode 100644 src/core/NEON/kernels/NEReshapeLayerKernel.h create mode 100644 src/core/NEON/kernels/NEReverseKernel.h create mode 100644 src/core/NEON/kernels/NEScaleKernel.h create mode 100644 src/core/NEON/kernels/NEScharr3x3Kernel.h create mode 100644 src/core/NEON/kernels/NESelectKernel.h create mode 100644 src/core/NEON/kernels/NESobel3x3Kernel.h create mode 100644 src/core/NEON/kernels/NESobel5x5Kernel.h create mode 100644 src/core/NEON/kernels/NESobel7x7Kernel.h create mode 100644 src/core/NEON/kernels/NESoftmaxLayerKernel.h create mode 100644 src/core/NEON/kernels/NESpaceToBatchLayerKernel.h create mode 100644 src/core/NEON/kernels/NESpaceToDepthLayerKernel.h create mode 100644 src/core/NEON/kernels/NEStackLayerKernel.h create mode 100644 src/core/NEON/kernels/NEStridedSliceKernel.h create mode 100644 src/core/NEON/kernels/NETableLookupKernel.h create mode 100644 src/core/NEON/kernels/NEThresholdKernel.h create mode 100644 src/core/NEON/kernels/NETileKernel.h create mode 100644 src/core/NEON/kernels/NETransposeKernel.h create mode 100644 src/core/NEON/kernels/NEUpsampleLayerKernel.h create mode 100644 src/core/NEON/kernels/NEWarpKernel.h create mode 100644 src/core/NEON/kernels/NEWeightsReshapeKernel.h create mode 100644 src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h create mode 100644 src/core/NEON/kernels/NEYOLOLayerKernel.h diff --git a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h index 1a3f2ba679..068b37d80c 100644 --- a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h +++ b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h @@ -24,7 +24,7 @@ #ifndef ARM_COMPUTE_CPPBOXWITHNONMAXIMASUPPRESSIONLIMITKERNEL_H #define ARM_COMPUTE_CPPBOXWITHNONMAXIMASUPPRESSIONLIMITKERNEL_H -#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/CPP/ICPPKernel.h" #include "arm_compute/core/Types.h" namespace arm_compute diff --git a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h index ddb346dfc2..e4fd250a61 100644 --- a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h +++ b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h @@ -24,8 +24,8 @@ #ifndef ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H #define ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H +#include "arm_compute/core/CPP/ICPPKernel.h" #include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "support/Mutex.h" @@ -39,7 +39,7 @@ using IImage = ITensor; /** CPP kernel to perform corner candidates */ -class CPPCornerCandidatesKernel : public INEKernel +class CPPCornerCandidatesKernel : public ICPPKernel { public: const char *name() const override diff --git a/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h index dd6bbd56e0..5275a357b3 100644 --- a/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h +++ b/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,9 @@ #ifndef ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H #define ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H +#include "arm_compute/core/CPP/ICPPKernel.h" #include "arm_compute/core/IArray.h" #include "arm_compute/core/IHOG.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Types.h" namespace arm_compute @@ -53,6 +53,8 @@ public: CPPDetectionWindowNonMaximaSuppressionKernel(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default; /** Allow instances of this class to be moved */ CPPDetectionWindowNonMaximaSuppressionKernel &operator=(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default; + /** Default destructor */ + ~CPPDetectionWindowNonMaximaSuppressionKernel() = default; /** Initialise the kernel's input, output and the euclidean minimum distance * * @attention: If @ref IDetectionWindowArray is passed to the kernel, the map() and unmap() methods @ref IDetectionWindowArray must be called respectively before and after diff --git a/arm_compute/core/NEON/INEKernel.h b/arm_compute/core/NEON/INEKernel.h deleted file mode 100644 index 87e17c80b4..0000000000 --- a/arm_compute/core/NEON/INEKernel.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_INEKERNEL_H -#define ARM_COMPUTE_INEKERNEL_H - -#include "arm_compute/core/CPP/ICPPKernel.h" - -namespace arm_compute -{ -/** Common interface for all kernels implemented in NEON. */ -using INEKernel = ICPPKernel; -} // namespace arm_compute -#endif /*ARM_COMPUTE_INEKERNEL_H */ diff --git a/arm_compute/core/NEON/INESimpleKernel.h b/arm_compute/core/NEON/INESimpleKernel.h deleted file mode 100644 index abe15c15c3..0000000000 --- a/arm_compute/core/NEON/INESimpleKernel.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_INESIMPLEKERNEL_H -#define ARM_COMPUTE_INESIMPLEKERNEL_H - -#include "arm_compute/core/CPP/ICPPSimpleKernel.h" - -namespace arm_compute -{ -/** Interface for simple NEON kernels having 1 tensor input and 1 tensor output */ -using INESimpleKernel = ICPPSimpleKernel; -} // namespace arm_compute -#endif /*ARM_COMPUTE_INESIMPLEKERNEL_H */ diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h deleted file mode 100644 index 4d3e6633c9..0000000000 --- a/arm_compute/core/NEON/NEKernels.h +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEKERNELS_H -#define ARM_COMPUTE_NEKERNELS_H - -/* Header regrouping all the NEON kernels */ -#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" -#include "arm_compute/core/NEON/kernels/NEAccumulateKernel.h" -#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" -#include "arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h" -#include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h" -#include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h" -#include "arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h" -#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h" -#include "arm_compute/core/NEON/kernels/NEBox3x3Kernel.h" -#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h" -#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h" -#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h" -#include "arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" -#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h" -#include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" -#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" -#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" -#include "arm_compute/core/NEON/kernels/NECopyKernel.h" -#include "arm_compute/core/NEON/kernels/NECropKernel.h" -#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" -#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h" -#include "arm_compute/core/NEON/kernels/NEDilateKernel.h" -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" -#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h" -#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h" -#include "arm_compute/core/NEON/kernels/NEErodeKernel.h" -#include "arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h" -#include "arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h" -#include "arm_compute/core/NEON/kernels/NEFFTScaleKernel.h" -#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEFloorKernel.h" -#include "arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" -#include "arm_compute/core/NEON/kernels/NEGatherKernel.h" -#include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h" -#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h" -#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h" -#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" -#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" -#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" -#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" -#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" -#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h" -#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" -#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" -#include "arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h" -#include "arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h" -#include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h" -#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h" -#include "arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h" -#include "arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h" -#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" -#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h" -#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" -#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" -#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NERangeKernel.h" -#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h" -#include "arm_compute/core/NEON/kernels/NERemapKernel.h" -#include "arm_compute/core/NEON/kernels/NEReorgLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEReverseKernel.h" -#include "arm_compute/core/NEON/kernels/NEScaleKernel.h" -#include "arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h" -#include "arm_compute/core/NEON/kernels/NESelectKernel.h" -#include "arm_compute/core/NEON/kernels/NESobel3x3Kernel.h" -#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h" -#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h" -#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEStackLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h" -#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h" -#include "arm_compute/core/NEON/kernels/NEThresholdKernel.h" -#include "arm_compute/core/NEON/kernels/NETileKernel.h" -#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" -#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEWarpKernel.h" -#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" -#include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h" - -#endif /* ARM_COMPUTE_NEKERNELS_H */ diff --git a/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h b/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h deleted file mode 100644 index 894e9277c7..0000000000 --- a/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H -#define ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the absolute difference kernel - * - * Absolute difference is computed by: - * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] - */ -class NEAbsoluteDifferenceKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEAbsoluteDifferenceKernel"; - } - /** Default constructor */ - NEAbsoluteDifferenceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAbsoluteDifferenceKernel(const NEAbsoluteDifferenceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAbsoluteDifferenceKernel &operator=(const NEAbsoluteDifferenceKernel &) = delete; - /** Allow instances of this class to be moved */ - NEAbsoluteDifferenceKernel(NEAbsoluteDifferenceKernel &&) = default; - /** Allow instances of this class to be moved */ - NEAbsoluteDifferenceKernel &operator=(NEAbsoluteDifferenceKernel &&) = default; - /** Default destructor */ - ~NEAbsoluteDifferenceKernel() = default; - - /** Set the inputs and output tensors - * - * @param[in] input1 Source tensor. Data types supported: U8/S16 - * @param[in] input2 Source tensor. Data types supported: U8/S16 - * @param[out] output Destination tensor, Data types supported: U8/S16 - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised absolute difference functions - * - * @param[in] input1 An input tensor. Data types supported: U8/S16. - * @param[in] input2 An input tensor. Data types supported: U8/S16. - * @param[out] output The output tensor, Data types supported: U8 (Only if both inputs are U8), S16. - * @param[in] window Region on which to execute the kernel. - */ - using AbsDiffFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); - - /** Absolute difference function to use for the particular tensor formats passed to configure() */ - AbsDiffFunction *_func; - const ITensor *_input1; - const ITensor *_input2; - ITensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h deleted file mode 100644 index 2e9935cd79..0000000000 --- a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEACCUMULATEKERNEL_H -#define ARM_COMPUTE_NEACCUMULATEKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Interface for the accumulate kernel - * - * Accumulation is computed by: - * @f[ accum(x,y) = accum(x,y) + input(x,y) @f] - */ -class NEAccumulateKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEAccumulateKernel"; - } - /** Set the input and accumulation tensors - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] accum Destination tensor. Data type supported: S16. - */ - void configure(const ITensor *input, ITensor *accum); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; -}; - -/** Interface for the accumulate weighted kernel - * - * Weighted accumulation is computed: - * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f] - * - * Where @f$ 0 \le \alpha \le 1 @f$ - * Conceptually, the rounding for this is defined as: - * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f] -*/ -class NEAccumulateWeightedKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEAccumulateWeightedKernel"; - } - /** Default constructor */ - NEAccumulateWeightedKernel(); - /** Set the input and accumulation tensors, and the scale value - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[in] alpha Scalar value in the range [0.0f, 1.0f] - * @param[in,out] accum Accumulated tensor. Data type supported: U8. - */ - void configure(const ITensor *input, float alpha, ITensor *accum); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -protected: - float _alpha; -}; - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** Interface for the accumulate weighted kernel using F16 */ -class NEAccumulateWeightedFP16Kernel : public NEAccumulateWeightedKernel -{ -public: - const char *name() const override - { - return "NEAccumulateWeightedFP16Kernel"; - } - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; -}; -#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -/** Interface for the accumulate weighted kernel using F16 */ -using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - -/** Interface for the accumulate squared kernel - * - * The accumulation of squares is computed: - * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f] - * - * Where @f$ 0 \le shift \le 15 @f$ -*/ -class NEAccumulateSquaredKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEAccumulateSquaredKernel"; - } - /** Default constructor */ - NEAccumulateSquaredKernel(); - /** Set the input and accumulation tensors and the shift value. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[in] shift Shift value in the range of [0, 15] - * @param[in,out] accum Accumulated tensor. Data type supported: S16. - */ - void configure(const ITensor *input, uint32_t shift, ITensor *accum); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - uint32_t _shift; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEACCUMULATEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h deleted file mode 100644 index a62f34cd58..0000000000 --- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H -#define ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/utils/misc/Traits.h" - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#include -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the activation layer kernel. */ -class NEActivationLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEActivationLayerKernel"; - } - /** Constructor */ - NEActivationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEActivationLayerKernel(const NEActivationLayerKernel &) = delete; - /** Default move constructor */ - NEActivationLayerKernel(NEActivationLayerKernel &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEActivationLayerKernel &operator=(const NEActivationLayerKernel &) = delete; - /** Default move assignment operator */ - NEActivationLayerKernel &operator=(NEActivationLayerKernel &&) = default; - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr, the activation function will be performed in-place - * - * @param[in, out] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[out] output Destination tensor info. Data type supported: same as @p input - * @param[in] activation_info Activation layer information. - */ - void configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info); - /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; - -private: - ActivationLayerInfo _act_info; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h deleted file mode 100644 index eece5708e8..0000000000 --- a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H -#define ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform addition between two tensors */ -class NEArithmeticAdditionKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEArithmeticAdditionKernel"; - } - /** Default constructor */ - NEArithmeticAdditionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEArithmeticAdditionKernel(const NEArithmeticAdditionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEArithmeticAdditionKernel &operator=(const NEArithmeticAdditionKernel &) = delete; - /** Allow instances of this class to be moved */ - NEArithmeticAdditionKernel(NEArithmeticAdditionKernel &&) = default; - /** Allow instances of this class to be moved */ - NEArithmeticAdditionKernel &operator=(NEArithmeticAdditionKernel &&) = default; - /** Default destructor */ - ~NEArithmeticAdditionKernel() = default; - - /** Initialise the kernel's input, output and border mode. - * - * Valid configurations (Input1,Input2) -> Output : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 - * - * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. - * @param[in] policy Overflow policy. - */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAdditionKernel - * - * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. - * @param[in] policy Overflow policy. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised add functions - * - * @param[in] input1 First input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32 - * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32 - * @param[out] output The output tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32. - * @param[in] policy Overflow policy. - * @param[in] window Region on which to execute the kernel. - */ - using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const Window &window); - /** Add function to use for the particular tensor types passed to configure() */ - AddFunction *_func; - ConvertPolicy _policy; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h deleted file mode 100644 index 7d00d1f7d0..0000000000 --- a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H -#define ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform subtraction between two tensors */ -class NEArithmeticSubtractionKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEArithmeticSubtractionKernel"; - } - /** Default constructor */ - NEArithmeticSubtractionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEArithmeticSubtractionKernel(const NEArithmeticSubtractionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEArithmeticSubtractionKernel &operator=(const NEArithmeticSubtractionKernel &) = delete; - /** Allow instances of this class to be moved */ - NEArithmeticSubtractionKernel(NEArithmeticSubtractionKernel &&) = default; - /** Allow instances of this class to be moved */ - NEArithmeticSubtractionKernel &operator=(NEArithmeticSubtractionKernel &&) = default; - /** Default destructor */ - ~NEArithmeticSubtractionKernel() = default; - - /** Initialise the kernel's input and output. - * - * Valid configurations (Input1,Input2) -> Output : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (QASYMM8, QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 - * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 - * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32. - * @param[in] policy Overflow policy. Convert policy cannot be WRAP if datatype is quantized. - */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtractionKernel - * - * Valid configurations (Input1,Input2) -> Output : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (QASYMM8, QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 - * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 - * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32. - * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised sub functions - * - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 - * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 - * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32. - * @param[in] window Region on which to execute the kernel. - * @param[in] is_sat Flag to indicate if the policy is SATURATE. - */ - using SubFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window, bool is_sat); - /** Sub function to use for the particular tensor types passed to configure() */ - SubFunction *_func; - ConvertPolicy _policy; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h deleted file mode 100644 index 478890925b..0000000000 --- a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H -#define ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the batch concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class NEBatchConcatenateLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEBatchConcatenateLayerKernel"; - } - /** Default constructor */ - NEBatchConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBatchConcatenateLayerKernel(const NEBatchConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBatchConcatenateLayerKernel &operator=(const NEBatchConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEBatchConcatenateLayerKernel(NEBatchConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEBatchConcatenateLayerKernel &operator=(NEBatchConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~NEBatchConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] batch_offset The offset on axis # 3. - * @param[in,out] output Output tensor info. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEBatchConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] batch_offset The offset on axis # 3. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; - -private: - using BatchConcatFunction = void(const ITensor *in, ITensor *out, unsigned int batch_offset, const Window &window); - -private: - BatchConcatFunction *_func; - unsigned int _batch_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h deleted file mode 100644 index 962d2565c0..0000000000 --- a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the batch normalization layer kernel. - */ -class NEBatchNormalizationLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEBatchNormalizationLayerKernel"; - } - /** Default constructor */ - NEBatchNormalizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBatchNormalizationLayerKernel(const NEBatchNormalizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBatchNormalizationLayerKernel &operator=(const NEBatchNormalizationLayerKernel &) = delete; - /** Default Move Constructor. */ - NEBatchNormalizationLayerKernel(NEBatchNormalizationLayerKernel &&) = default; - /** Default move assignment operator */ - NEBatchNormalizationLayerKernel &operator=(NEBatchNormalizationLayerKernel &&) = default; - /** Default destructor */ - ~NEBatchNormalizationLayerKernel() = default; - /** Set the input and output tensors. - * - * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. - * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: F16/F32. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input - * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input - * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - */ - void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta = nullptr, const ITensor *gamma = nullptr, float epsilon = 0.001f, - ActivationLayerInfo act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEBatchNormalizationLayerKernel - * - * @param[in] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result. - * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: F16/F32. - * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input - * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input - * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *mean, const ITensorInfo *var, - const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr, - float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Configure execution function in case of non-fused activation **/ - void configure_non_fused(); - /** Configure execution function in case of fused activation **/ - void configure_fused(); - - /** Template function to run batch normalization on fp32 - * - * @tparam T Specialization data type - * @tparam fused_activation Boolean that flags if its a fused activation or not - * @tparam F Activation function functor to run - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void batch_normalization_nchw(const Window &window); - /** Template function to run batch normalization on fp32 on tensors with NHWC format - * - * @tparam T Specialization data type - * @tparam fused_activation Boolean that flags if its a fused activation or not - * @tparam F Activation function functor to run - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void batch_normalization_nhwc(const Window &window); - /** Common signature for all the batch normalization functions - * - * @param[in] window Region on which to execute the kernel. - */ - using BatchNormFunctionPtr = void (NEBatchNormalizationLayerKernel::*)(const Window &window); - -private: - BatchNormFunctionPtr _func; - ITensor *_input; - ITensor *_output; - const ITensor *_mean; - const ITensor *_var; - const ITensor *_gamma; - const ITensor *_beta; - float _epsilon; - ActivationLayerInfo _act_info; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h deleted file mode 100644 index 943577d879..0000000000 --- a/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H -#define ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the batch to space kernel */ -class NEBatchToSpaceLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEBatchToSpaceLayerKernel"; - } - /** Default constructor */ - NEBatchToSpaceLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBatchToSpaceLayerKernel(const NEBatchToSpaceLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBatchToSpaceLayerKernel &operator=(const NEBatchToSpaceLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEBatchToSpaceLayerKernel(NEBatchToSpaceLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEBatchToSpaceLayerKernel &operator=(NEBatchToSpaceLayerKernel &&) = default; - /** Default destructor */ - ~NEBatchToSpaceLayerKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ITensor *input, const ITensor *block_shape, ITensor *output); - /** Initialise the kernel's inputs and output (Static block shape). - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ITensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[in] output Tensor output. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel (Static block shape). - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] output Tensor output. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; /**< Source tensor */ - const ITensor *_block_shape; /**< Block shape tensor */ - ITensor *_output; /**< Destination tensor */ - DataLayout _data_layout; /**< Data layout to be used at run-time */ - - int32_t _block_shape_x; - int32_t _block_shape_y; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h deleted file mode 100644 index 0e4c886d34..0000000000 --- a/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBITWISEANDKERNEL_H -#define ARM_COMPUTE_NEBITWISEANDKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform bitwise AND between XY-planes of two tensors - * - * Result is computed by: - * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f] - */ -class NEBitwiseAndKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEBitwiseAndKernel"; - } - /** Default constructor */ - NEBitwiseAndKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBitwiseAndKernel(const NEBitwiseAndKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBitwiseAndKernel &operator=(const NEBitwiseAndKernel &) = delete; - /** Allow instances of this class to be moved */ - NEBitwiseAndKernel(NEBitwiseAndKernel &&) = default; - /** Allow instances of this class to be moved */ - NEBitwiseAndKernel &operator=(NEBitwiseAndKernel &&) = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input1 An input tensor. Data type supported: U8. - * @param[in] input2 An input tensor. Data type supported: U8 - * @param[out] output Output tensor. Data type supported: U8. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input1; /**< Source tensor 1 */ - const ITensor *_input2; /**< Source tensor 2 */ - ITensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEBITWISEANDKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h deleted file mode 100644 index a20fdaec93..0000000000 --- a/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBITWISENOTKERNEL_H -#define ARM_COMPUTE_NEBITWISENOTKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform bitwise NOT operation - * - * Result is computed by: - * @f[ output(x,y) = \lnot input(x,y) @f] - */ -class NEBitwiseNotKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEBitwiseNotKernel"; - } - /** Default constructor */ - NEBitwiseNotKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBitwiseNotKernel(const NEBitwiseNotKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBitwiseNotKernel &operator=(const NEBitwiseNotKernel &) = delete; - /** Allow instances of this class to be moved */ - NEBitwiseNotKernel(NEBitwiseNotKernel &&) = default; - /** Allow instances of this class to be moved */ - NEBitwiseNotKernel &operator=(NEBitwiseNotKernel &&) = default; - /** Initialise the kernel's input and output - * - * @param[in] input An input tensor. Data type supported: U8. - * @param[out] output The output tensor. Data type supported: U8. - */ - void configure(const ITensor *input, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; /**< Source tensor */ - ITensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEBITWISENOTKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h deleted file mode 100644 index 70db5fbeb6..0000000000 --- a/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBITWISEORKERNEL_H -#define ARM_COMPUTE_NEBITWISEORKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform bitwise inclusive OR between two tensors - * - * Result is computed by: - * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f] - */ -class NEBitwiseOrKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEBitwiseOrKernel"; - } - /** Default constructor */ - NEBitwiseOrKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBitwiseOrKernel(const NEBitwiseOrKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBitwiseOrKernel &operator=(const NEBitwiseOrKernel &) = delete; - /** Allow instances of this class to be moved */ - NEBitwiseOrKernel(NEBitwiseOrKernel &&) = default; - /** Allow instances of this class to be moved */ - NEBitwiseOrKernel &operator=(NEBitwiseOrKernel &&) = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input1 An input tensor. Data type supported: U8. - * @param[in] input2 An input tensor. Data type supported: U8 - * @param[out] output Output tensor. Data type supported: U8. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input1; /**< Source tensor 1 */ - const ITensor *_input2; /**< Source tensor 2 */ - ITensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEBITWISEORKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h deleted file mode 100644 index 91f24f1c82..0000000000 --- a/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBITWISEXORKERNEL_H -#define ARM_COMPUTE_NEBITWISEXORKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform bitwise exclusive OR (XOR) between two tensors - * - * Result is computed by: - * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f] - */ -class NEBitwiseXorKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEBitwiseXorKernel"; - } - /** Default constructor */ - NEBitwiseXorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBitwiseXorKernel(const NEBitwiseXorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBitwiseXorKernel &operator=(const NEBitwiseXorKernel &) = delete; - /** Allow instances of this class to be moved */ - NEBitwiseXorKernel(NEBitwiseXorKernel &&) = default; - /** Allow instances of this class to be moved */ - NEBitwiseXorKernel &operator=(NEBitwiseXorKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input1 An input tensor. Data type supported: U8. - * @param[in] input2 An input tensor. Data type supported: U8 - * @param[out] output The output tensor. Data type supported: U8. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input1; /**< Source tensor 1 */ - const ITensor *_input2; /**< Source tensor 2 */ - ITensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEBITWISEXORKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h b/arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h deleted file mode 100644 index 8b3953a53a..0000000000 --- a/arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H -#define ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the bounding box kernel */ -class NEBoundingBoxTransformKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEBoundingBoxTransformKernel"; - } - - /** Default constructor */ - NEBoundingBoxTransformKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBoundingBoxTransformKernel(const NEBoundingBoxTransformKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBoundingBoxTransformKernel &operator=(const NEBoundingBoxTransformKernel &) = delete; - /** Allow instances of this class to be moved */ - NEBoundingBoxTransformKernel(NEBoundingBoxTransformKernel &&) = default; - /** Allow instances of this class to be moved */ - NEBoundingBoxTransformKernel &operator=(NEBoundingBoxTransformKernel &&) = default; - /** Default destructor */ - ~NEBoundingBoxTransformKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. - * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. - * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input. - * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. - * - * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. - * - */ - void configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform - * - * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. - * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. - * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input. - * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. - * - * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. - * - * @return a Status - */ - static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - template - void internal_run(const Window &window); - - const ITensor *_boxes; - ITensor *_pred_boxes; - const ITensor *_deltas; - BoundingBoxTransformInfo _bbinfo; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h deleted file mode 100644 index 32e991e217..0000000000 --- a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBOX3x3KERNEL_H -#define ARM_COMPUTE_NEBOX3x3KERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a Box 3x3 filter */ -class NEBox3x3Kernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEBox3x3Kernel"; - } - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data type supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** NEON kernel to perform a Box 3x3 filter for FP16 datatype - */ -class NEBox3x3FP16Kernel : public NEBox3x3Kernel -{ -public: - const char *name() const override - { - return "NEBox3x3FP16Kernel"; - } - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; -}; -#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -/** NEON kernel to perform a Box 3x3 filter for FP16 datatype */ -using NEBox3x3FP16Kernel = NEBox3x3Kernel; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEBOX3x3KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h deleted file mode 100644 index c4e1f3ec3a..0000000000 --- a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECANNYEDGEKERNEL_H -#define ARM_COMPUTE_NECANNYEDGEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Computes magnitude and quantised phase from inputs gradients. */ -class NEGradientKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGradientKernel"; - } - /** Default constructor */ - NEGradientKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGradientKernel(const NEGradientKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGradientKernel &operator=(const NEGradientKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGradientKernel(NEGradientKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGradientKernel &operator=(NEGradientKernel &&) = default; - /** Default destructor */ - virtual ~NEGradientKernel() = default; - - /** Initialise the kernel's sources, destinations and border mode. - * - * @note gx, gy and magnitude must all be the same size (either 16 or 32) - * - * @param[in] gx Source tensor - Gx component. Data type supported: S16/S32. - * @param[in] gy Source tensor - Gy component. Data type supported: same as @p gx. - * @param[out] magnitude Destination tensor - Magnitude. Data type supported: U16 (if the data type of @p gx is S16) / U32 (if the data type of @p gx is S32). - * @param[out] phase Destination tensor - Quantized phase. Data type supported: U8. - * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm - */ - virtual void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -protected: - /** Common signature for all the specialised gradient functions - * - * @param[in] gx_ptr Pointer to the first input tensor. - * @param[in] gy_ptr Pointer to the second input tensor. - * @param[out] magnitude_ptr Pointer to the first output tensor - * @param[out] phase_ptr Pointer to the second output tensor - */ - using GradientFunction = void(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr); - - GradientFunction *_func; /**< Gradient function to use for the particular tensor types passed to configure() */ - const ITensor *_gx; /**< Source tensor - Gx component */ - const ITensor *_gy; /**< Source tensor - Gy component */ - ITensor *_magnitude; /**< Destination tensor - Magnitude */ - ITensor *_phase; /**< Destination tensor - Quantized phase */ -}; - -/** NEON kernel to perform Non-Maxima suppression for Canny Edge. - * - * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input - * to characterize points as possible edges. Thus, at the end, each point will be set to EDGE, NO_EDGE or MAYBE. - * - * @note Hysteresis is computed in @ref NEEdgeTraceKernel - */ -class NEEdgeNonMaxSuppressionKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEEdgeNonMaxSuppressionKernel"; - } - /** Default constructor */ - NEEdgeNonMaxSuppressionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEEdgeNonMaxSuppressionKernel(const NEEdgeNonMaxSuppressionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEEdgeNonMaxSuppressionKernel &operator=(const NEEdgeNonMaxSuppressionKernel &) = delete; - /** Allow instances of this class to be moved */ - NEEdgeNonMaxSuppressionKernel(NEEdgeNonMaxSuppressionKernel &&) = default; - /** Allow instances of this class to be moved */ - NEEdgeNonMaxSuppressionKernel &operator=(NEEdgeNonMaxSuppressionKernel &&) = default; - /** Default destructor */ - ~NEEdgeNonMaxSuppressionKernel() = default; - - /** Initialise the kernel's sources, destination and border mode. - * - * @param[in] magnitude Source tensor - Magnitude. Data type supported: U16/U32. - * @param[in] phase Source tensor - Quantized phase. Data type supported: U8. - * @param[out] output Output tensor. Data type supported: U8. It will be filled with 0 for "no edge", 127 for "maybe", 255 for "edge" - * @param[in] upper_thr Upper threshold used for the hysteresis - * @param[in] lower_thr Lower threshold used for the hysteresis - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *magnitude, const ITensor *phase, ITensor *output, int32_t upper_thr, int32_t lower_thr, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Common signature for all the specialised non-maxima suppression functions - * - * @param[in] magnitude_ptr Pointer to the first input tensor. - * @param[in] phase_ptr Pointer to the second input tensor. - * @param[out] output_ptr Pointer to the output tensor - * @param[in] stride_mag Stride of the magnitude tensor - * @param[in] upper_thr Upper threshold used for the hysteresis - * @param[in] lower_thr Lower threshold used for the hysteresis - */ - using EdgeNonMaxSupprFunction = void(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t upper_thr, - const int32_t lower_thr); - - EdgeNonMaxSupprFunction *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */ - const ITensor *_magnitude; /**< Source tensor - Magnitude */ - const ITensor *_phase; /**< Source tensor - Quantized phase */ - ITensor *_output; /**< Destination tensor */ - int32_t _lower_thr; /**< Lower threshold used for the hysteresis */ - int32_t _upper_thr; /**< Upper threshold used for the hysteresis */ -}; - -/** NEON kernel to perform Edge tracing */ -class NEEdgeTraceKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEEdgeTraceKernel"; - } - /** Default constructor */ - NEEdgeTraceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEEdgeTraceKernel(const NEEdgeTraceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEEdgeTraceKernel &operator=(const NEEdgeTraceKernel &) = delete; - /** Allow instances of this class to be moved */ - NEEdgeTraceKernel(NEEdgeTraceKernel &&) = default; - /** Allow instances of this class to be moved */ - NEEdgeTraceKernel &operator=(NEEdgeTraceKernel &&) = default; - /** Default constructor */ - ~NEEdgeTraceKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in,out] input Source tensor. Data type supported: U8. Must contain 0 for "no edge", 127 for "maybe", 255 for "edge" - * @param[in,out] output Destination tensor. Data type supported: U8. Must be initialized to 0 (No edge). - */ - void configure(ITensor *input, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - bool is_parallelisable() const override; - -private: - ITensor *_input; /**< Source tensor */ - ITensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NECANNYEDGEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h b/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h deleted file mode 100644 index 5d32aed573..0000000000 --- a/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H -#define ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -#include -#include - -namespace arm_compute -{ -class IMultiImage; -class ITensor; -using IImage = ITensor; - -/** Interface for the channel combine kernel */ -class NEChannelCombineKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEChannelCombineKernel"; - } - /** Default constructor */ - NEChannelCombineKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEChannelCombineKernel(const NEChannelCombineKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEChannelCombineKernel &operator=(const NEChannelCombineKernel &) = delete; - /** Allow instances of this class to be moved */ - NEChannelCombineKernel(NEChannelCombineKernel &&) = default; - /** Allow instances of this class to be moved */ - NEChannelCombineKernel &operator=(NEChannelCombineKernel &&) = default; - /** Default destructor */ - ~NEChannelCombineKernel() = default; - - /** Configure function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 - * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 - * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 - * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8 - * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - */ - void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output); - /** Configure function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 - * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 - * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 - * @param[out] output The multi planar output tensor. Formats supported: NV12/NV21/IYUV/YUV444 - */ - void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - bool is_parallelisable() const override; - -private: - /** Combine 3 planes to form a three channel single plane tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void combine_3C(const Window &win); - /** Combine 4 planes to form a four channel single plane tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void combine_4C(const Window &win); - /** Combine 3 planes to form a single plane YUV tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - template - void combine_YUV_1p(const Window &win); - /** Combine 3 planes to form a two plane YUV tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void combine_YUV_2p(const Window &win); - /** Combine 3 planes to form a three plane YUV tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void combine_YUV_3p(const Window &win); - /** Copies a full plane to the output tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void copy_plane(const Window &win, uint32_t plane_id); - /** Common signature for all the specialised ChannelCombine functions - * - * @param[in] window Region on which to execute the kernel. - */ - using ChannelCombineFunction = void (NEChannelCombineKernel::*)(const Window &window); - /** ChannelCombine function to use for the particular tensor types passed to configure() */ - ChannelCombineFunction _func; - std::array _planes; - ITensor *_output; - IMultiImage *_output_multi; - std::array _x_subsampling; - std::array _y_subsampling; - unsigned int _num_elems_processed_per_iteration; - bool _is_parallelizable; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h b/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h deleted file mode 100644 index debae2488f..0000000000 --- a/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H -#define ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class IMultiImage; -class ITensor; -using IImage = ITensor; - -/** Interface for the channel extract kernel */ -class NEChannelExtractKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEChannelExtractKernel"; - } - /** Default constructor */ - NEChannelExtractKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEChannelExtractKernel(const NEChannelExtractKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEChannelExtractKernel &operator=(const NEChannelExtractKernel &) = delete; - /** Allow instances of this class to be moved */ - NEChannelExtractKernel(NEChannelExtractKernel &&) = default; - /** Allow instances of this class to be moved */ - NEChannelExtractKernel &operator=(NEChannelExtractKernel &&) = default; - /** Default destructor */ - ~NEChannelExtractKernel() = default; - - /** Set the input and output of the kernel - * - * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 - * @param[in] channel Channel to extract. - * @param[out] output Destination tensor. Format supported: U8 - */ - void configure(const ITensor *input, Channel channel, ITensor *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 - * @param[in] channel Channel to extract. - * @param[out] output Single-planar destination image. Format supported: U8 - */ - void configure(const IMultiImage *input, Channel channel, IImage *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Extract one channel from a two channel planar tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void extract_1C_from_2C_img(const Window &win); - /** Extract one channel from a three channel planar tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void extract_1C_from_3C_img(const Window &win); - /** Extract one channel from a four channel planar tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void extract_1C_from_4C_img(const Window &win); - /** Extract U/V channel from a single planar YUVY/UYVY tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void extract_YUYV_uv(const Window &win); - /** Copies a full plane to the output tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void copy_plane(const Window &win); - /** Common signature for all the specialised ChannelExtract functions - * - * @param[in] window Region on which to execute the kernel. - */ - using ChannelExtractFunction = void (NEChannelExtractKernel::*)(const Window &window); - /** ChannelExtract function to use for the particular tensor types passed to configure() */ - ChannelExtractFunction _func; - unsigned int _lut_index; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h b/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h deleted file mode 100644 index e5bce7e273..0000000000 --- a/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H -#define ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the channel shuffle kernel */ -class NEChannelShuffleLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEChannelShuffleLayerKernel"; - } - /** Default constructor */ - NEChannelShuffleLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEChannelShuffleLayerKernel(const NEChannelShuffleLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEChannelShuffleLayerKernel &operator=(const NEChannelShuffleLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEChannelShuffleLayerKernel(NEChannelShuffleLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEChannelShuffleLayerKernel &operator=(NEChannelShuffleLayerKernel &&) = default; - /** Default destructor */ - ~NEChannelShuffleLayerKernel() = default; - /** Configure function's inputs and outputs. - * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. - */ - void configure(const ITensor *input, ITensor *output, unsigned int num_groups); - /** Static function to check if given info will lead to a valid configuration of @ref NEChannelShuffleLayerKernel - * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; - unsigned int _num_groups; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NECol2ImKernel.h b/arm_compute/core/NEON/kernels/NECol2ImKernel.h deleted file mode 100644 index e988771599..0000000000 --- a/arm_compute/core/NEON/kernels/NECol2ImKernel.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECOL2IMKERNEL_H -#define ARM_COMPUTE_NECOL2IMKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -#include "arm_compute/core/Size2D.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform col2im reshaping. - * - * Rearranges each matrix column into image blocks. It's the inverse operation of @ref NEIm2ColKernel. - * - * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: - * - * @f[ - * \left( \begin{array}{ccccccccc} - * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccc} - * a0 & a1 & a2 \\ - * a3 & a4 & a5 \\ - * a6 & a7 & a8 \\ - * \end{array} \right) - * @f] - */ -class NECol2ImKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NECol2ImKernel"; - } - /** Default constructor */ - NECol2ImKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECol2ImKernel(const NECol2ImKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECol2ImKernel &operator=(const NECol2ImKernel &) = delete; - /** Allow instances of this class to be moved */ - NECol2ImKernel(NECol2ImKernel &&) = default; - /** Allow instances of this class to be moved */ - NECol2ImKernel &operator=(NECol2ImKernel &&) = default; - /** Default destructor */ - ~NECol2ImKernel() = default; - - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. Data types supported: All - * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], - * while the rest represent batch of outputs. Data types supported: Same as @p input - * @param[in] convolved_dims Output convolved dimensions. - */ - void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims); - /** Static function to check if given info will lead to a valid configuration of @ref NECol2ImKernel - * - * @param[in] input The input tensor to convert. Data types supported: All - * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], - * while the rest represent batch of outputs. Data types supported: Same as @p input - * @param[in] convolved_dims Output convolved dimensions. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run the col2im - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void run_col2im(const Window &window); - - /** Common signature for all the specialised col2im functions - * - * @param[in] window Region on which to execute the kernel. - */ - using Col2ImFunctionPtr = void (NECol2ImKernel::*)(const Window &window); - - Col2ImFunctionPtr _func; - const ITensor *_input; - ITensor *_output; - Size2D _convolved_dims; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECOL2IMKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEColorConvertKernel.h b/arm_compute/core/NEON/kernels/NEColorConvertKernel.h deleted file mode 100644 index 88c03b7607..0000000000 --- a/arm_compute/core/NEON/kernels/NEColorConvertKernel.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_COLORCONVERTKERNEL_H -#define ARM_COMPUTE_COLORCONVERTKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class IMultiImage; -class ITensor; -using IImage = ITensor; - -/** Interface for the color convert kernel */ -class NEColorConvertKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEColorConvertKernel"; - } - /** Default constructor */ - NEColorConvertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEColorConvertKernel(const NEColorConvertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEColorConvertKernel &operator=(const NEColorConvertKernel &) = delete; - /** Allow instances of this class to be moved */ - NEColorConvertKernel(NEColorConvertKernel &&) = default; - /** Allow instances of this class to be moved */ - NEColorConvertKernel &operator=(NEColorConvertKernel &&) = default; - /** Default destructor */ - ~NEColorConvertKernel() = default; - - /** Set the input and output of the kernel - * - * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 - * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), - * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), - * U8 (if the formats of @p input is RGB888) - */ - void configure(const ITensor *input, ITensor *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 - */ - void configure(const IMultiImage *input, IImage *output); - /** Set the input and output of the kernel - * - * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) - */ - void configure(const IImage *input, IMultiImage *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) - */ - void configure(const IMultiImage *input, IMultiImage *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - using ColorConvertFunction = void(const void *__restrict input_ptr, void *__restrict output_ptr, const Window &win); - const void *_input; - void *_output; - ColorConvertFunction *_func; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECOLORCONVERTKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h b/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h deleted file mode 100644 index dadf9e9b94..0000000000 --- a/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H -#define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa. - * - * @note This function can be applied to the 2D weights used by a Fully Connected layer if: - * - It follows a Convolution layer - * - The data layout used by the network does not match the one the model has been trained in. - * - * @note This function assumes the weights are already reshaped (transposed) - */ -class NEConvertFullyConnectedWeightsKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEConvertFullyConnectedWeightsKernel"; - } - /** Default constructor */ - NEConvertFullyConnectedWeightsKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvertFullyConnectedWeightsKernel(const NEConvertFullyConnectedWeightsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvertFullyConnectedWeightsKernel &operator=(const NEConvertFullyConnectedWeightsKernel &) = delete; - /** Allow instances of this class to be moved */ - NEConvertFullyConnectedWeightsKernel(NEConvertFullyConnectedWeightsKernel &&) = default; - /** Allow instances of this class to be moved */ - NEConvertFullyConnectedWeightsKernel &operator=(NEConvertFullyConnectedWeightsKernel &&) = default; - /** Default destructor */ - ~NEConvertFullyConnectedWeightsKernel() = default; - /** Set the input and output tensor. - * - * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. - * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. - * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). - * @param[in] data_layout The data layout the weights have been trained in. - */ - void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout); - /** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeightsKernel - * - * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All. - * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input. - * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). - * @param[in] data_layout The data layout the weights have been trained in. - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run the permute - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void run_convert_fc_weights(const Window &window); - - const ITensor *_input; - ITensor *_output; - unsigned int _factor1; /* equals to the number of elements per original input plane if @p data_layout == NCHW; its number of channels otherwise */ - unsigned int _factor2; /* equals to the number of elements per original input plane if @p data_layout == NHWC; its number of channels otherwise */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h b/arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h deleted file mode 100644 index 6c74a1216c..0000000000 --- a/arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H -#define ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** NEON kernel to convert asymmetric signed to asymmetric signed and vice-versa */ -class NEConvertQuantizedSignednessKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEConvertQuantizedSignednessKernel"; - } - /** Default constructor */ - NEConvertQuantizedSignednessKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NEConvertQuantizedSignednessKernel(const NEConvertQuantizedSignednessKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NEConvertQuantizedSignednessKernel &operator=(const NEConvertQuantizedSignednessKernel &) = delete; - /** Allow instances of this class to be moved */ - NEConvertQuantizedSignednessKernel(NEConvertQuantizedSignednessKernel &&) = default; - /** Allow instances of this class to be moved */ - NEConvertQuantizedSignednessKernel &operator=(NEConvertQuantizedSignednessKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data types supported: opposite of @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor. Data types supported: opposite of @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEConvolutionKernel.h b/arm_compute/core/NEON/kernels/NEConvolutionKernel.h deleted file mode 100644 index 51a63335ff..0000000000 --- a/arm_compute/core/NEON/kernels/NEConvolutionKernel.h +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECONVOLUTIONKERNEL_H -#define ARM_COMPUTE_NECONVOLUTIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/NEON/INESimpleKernel.h" - -#include -#include -#include - -namespace arm_compute -{ -class ITensor; - -/****************************************************************************************\ - * Square Convolution * -\****************************************************************************************/ - -/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9). - * The client can supply a convolution matrix \f$ C_{m,n} \f$. - * @f{eqnarray}{ - * k_0 &=& \frac{m}{2} \\ - * l_0 &=& \frac{n}{2} \\ - * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l} - * @f} - * - * @note The above equation for this function is similar to the default OpenCV Filter2D function, - * which actually computes a correlation and not a convolution. - * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically. - */ -template -class NEConvolutionKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEConvolutionKernel"; - } - /** Default constructor */ - NEConvolutionKernel(); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - template - void convolution(const Window &win); - -protected: - uint32_t _scale; /**< scale of the convolution */ - std::array _convolution; /**< convolution matrix */ -}; - -/** Interface for the kernel which applied a 3x3 convolution to a tensor.*/ -using NEConvolution3x3Kernel = NEConvolutionKernel<3>; -/** Interface for the kernel which applied a 5x5 convolution to a tensor.*/ -using NEConvolution5x5Kernel = NEConvolutionKernel<5>; -/** Interface for the kernel which applied a 7x7 convolution to a tensor.*/ -using NEConvolution7x7Kernel = NEConvolutionKernel<7>; -///** Interface for the kernel which applied a 9x9 convolution to a tensor.*/ -using NEConvolution9x9Kernel = NEConvolutionKernel<9>; - -/****************************************************************************************\ - * Separable Square Convolution * -\****************************************************************************************/ - -/** Kernel for the Horizontal pass of a Separable Convolution */ -template -class NESeparableConvolutionHorKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NESeparableConvolutionHorKernel"; - } - /** Default constructor */ - NESeparableConvolutionHorKernel(); - - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data types supported: U16, S16, S32. - * @param[in] conv_row Convolution matrix to apply to the input tensor. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, const int16_t *conv_row, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Apply the object's convolution to the given window of the input tensor.. - * - * @param[in] window Window to apply the convolution on. - */ - template - void convolve(const Window &window); - - std::array _conv_row; /**< Convolution coefficients */ - BorderSize _border_size; /**< Border size */ -}; - -/** Interface for the kernel which applied a 5x1 horizontal convolution to a tensor.*/ -using NESeparableConvolution5x5HorKernel = NESeparableConvolutionHorKernel<5>; -/** Interface for the kernel which applied a 7x1 horizontal convolution to a tensor.*/ -using NESeparableConvolution7x7HorKernel = NESeparableConvolutionHorKernel<7>; -/** Interface for the kernel which applied a 9x1 horizontal convolution to a tensor.*/ -using NESeparableConvolution9x9HorKernel = NESeparableConvolutionHorKernel<9>; - -/** Kernel for the Vertical pass of a Separable Convolution */ -template -class NESeparableConvolutionVertKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NESeparableConvolutionVertKernel"; - } - /** Default constructor */ - NESeparableConvolutionVertKernel(); - - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data type supported: U16, S16, S32. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv_col Convolution matrix to apply to the input tensor. - * @param[in] scale Scale of the convolution matrix - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, const int16_t *conv_col, uint32_t scale, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Apply the object's convolution to the given window of the input tensor. - * This function is used if the intermediate values have been stored as U16. - * - * @param[in] win Window to apply the convolution on. - */ - template - void convolution_u16(const Window &win); - /** Apply the object's convolution to the given window of the input tensor. - * This function is used if the intermediate values have been stored as S16. - * - * @param[in] win Window to apply the convolution on. - */ - template - void convolution_s16(const Window &win); - /** Apply the object's convolution to the given window of the input tensor. - * This function is used if the intermediate values have been stored as S32. - * - * @param[in] win Window to apply the convolution on. - */ - template - void convolution_s32(const Window &win); - - std::array _conv_col; /**< Convolution coefficients */ - uint32_t _scale; /**< Convolution's scale */ -}; - -/** Interface for the kernel which applied a 1x5 vertical convolution to a tensor.*/ -using NESeparableConvolution5x5VertKernel = NESeparableConvolutionVertKernel<5>; -/** Interface for the kernel which applied a 1x7 vertical convolution to a tensor.*/ -using NESeparableConvolution7x7VertKernel = NESeparableConvolutionVertKernel<7>; -/** Interface for the kernel which applied a 1x9 vertical convolution to a tensor.*/ -using NESeparableConvolution9x9VertKernel = NESeparableConvolutionVertKernel<9>; - -/****************************************************************************************\ - * Rectangle Convolution * -\****************************************************************************************/ - -/** Kernel for the running convolution on a rectangle matrix. - * - * @note Supports combinations of 3,5,7 and 9. - */ -class NEConvolutionRectangleKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEConvolutionRectangleKernel"; - } - /** Default constructor */ - NEConvolutionRectangleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &) = delete; - /** Allow instances of this class to be moved */ - NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &&) = default; - /** Allow instances of this class to be moved */ - NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] width Width of convolution matrix (Number of columns) - * @param[in] height Height of convolution matrix (Number of rows) - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - unsigned int get_index(uint32_t val); - /** Apply the object's convolution to the given window of the input tensor. - * - * @param[in] win Window to apply the convolution on. - */ - template - void convolution(const Window &win); - -protected: - const ITensor *_input; /**< Input tensor */ - ITensor *_output; /**< Output tensor */ - uint32_t _scale; /**< Scale of the convolution */ - std::vector _convolution; /**< Convolution matrix */ - BorderSize _border_size; /**< Calculated border width */ - uint32_t _func_idx; /**< Index used to specify convolution function to be used */ - const static unsigned int _nr_supported_sizes - { - 4 - }; /**< Number of supported permutations */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECONVOLUTIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NECopyKernel.h b/arm_compute/core/NEON/kernels/NECopyKernel.h deleted file mode 100644 index ddd14c18b8..0000000000 --- a/arm_compute/core/NEON/kernels/NECopyKernel.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECOPYKERNEL_H -#define ARM_COMPUTE_NECOPYKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a copy between two tensors */ -class NECopyKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NECopyKernel"; - } - /** Default constructor */ - NECopyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NECopyKernel(const NECopyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NECopyKernel &operator=(const NECopyKernel &) = delete; - /** Allow instances of this class to be moved */ - NECopyKernel(NECopyKernel &&) = default; - /** Allow instances of this class to be moved */ - NECopyKernel &operator=(NECopyKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input Source tensor. Data types supported: All - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] padding (Optional) Padding to be applied to the input tensor - */ - void configure(const ITensor *input, ITensor *output, const PaddingList &padding = PaddingList()); - /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel - * - * @param[in] input Source tensor. Data types supported: All - * @param[in] output Destination tensor. Data types supported: same as @p input. - * @param[in] padding (Optional) Padding to be applied to the input tensor - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding = PaddingList()); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; - PaddingList _padding; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECOPYKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NECropKernel.h b/arm_compute/core/NEON/kernels/NECropKernel.h deleted file mode 100644 index b7e185f550..0000000000 --- a/arm_compute/core/NEON/kernels/NECropKernel.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEON_CROP_KERNEL_H -#define ARM_COMPUTE_NEON_CROP_KERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the kernel to perform tensor cropping */ -class NECropKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NECropKernel"; - } - /** Default constructor */ - NECropKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECropKernel(const NECropKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECropKernel &operator=(const NECropKernel &) = delete; - /** Allow instances of this class to be moved */ - NECropKernel(NECropKernel &&) = default; - /** Allow instances of this class to be moved */ - NECropKernel &operator=(NECropKernel &&) = default; - /** Default destructor */ - ~NECropKernel() = default; - /** Configure kernel - * - * @note Supported tensor rank: up to 4 - * @note Padding not supported. - * - * @param[in] input Source tensor. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC. - * @param[in] crop_boxes Tensor containing all possible boxes used to crop the image, each represented by 4 normalized values. - * Data type supported: F32 - * @param[in] box_ind One dimensional tensor mapping the @p crop_box_ind to the index of the 3D image in @p input. - * Data type supported: F32 - * @param[out] output Destination tensor. Data type supported: F32 - * @param[in] crop_box_ind Index of the crop box to be used from @p crop_boxes. Default is 0. - * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. - */ - void configure(const ITensor *input, const ITensor *crop_boxes, const ITensor *box_ind, ITensor *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0); - - /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel - * - * @note Supported tensor rank: up to 4 - * @note Padding not supported. - * - * @param[in] input Source tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC. - * @param[in] crop_boxes Tensor info for tensor containing all possible boxes used to crop the image. Data type supported: F32 - * @param[in] box_ind Tensor info for the one dimensional tensor mapping the @p crop_box_ind to the index of the 3D image - * in @p input. Data type supported: F32 - * @param[in] output Destination tensor. Data type supported: F32 - * @param[in] crop_box_ind Index of the crop box to be used from @p crop_boxes. Default is 0. - * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *crop_boxes, const ITensorInfo *box_ind, const ITensorInfo *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0); - - /** Configure output tensor's shape as this can only be determined at runtime. */ - void configure_output_shape(); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - - /** Function to use for in bounds crop for the particular tensor types passed to configure() */ - using InBoundsCropFunction = void(const ITensor *, const ITensor *, float *, Coordinates, int32_t, int32_t, int32_t, bool, bool); - -private: - const ITensor *_input; - const ITensor *_crop_boxes; - const ITensor *_box_ind; - ITensor *_output; - - Coordinates _start; - Coordinates _end; - uint32_t _crop_box_ind; - float _extrapolation_value; - /** The number of rows out of bounds at the start and end of output. */ - std::array _rows_out_of_bounds; - /** The number of columns out of bounds at the start and end of output. */ - std::array _cols_out_of_bounds; - - NECropKernel::InBoundsCropFunction *_in_bounds_crop_function; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEON_CROP_KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h b/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h deleted file mode 100644 index e4fe81a5d5..0000000000 --- a/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H -#define ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -#include - -namespace arm_compute -{ -class IDistribution1D; -class ILut; -class ITensor; -using IImage = ITensor; - -/** Interface for the cumulative distribution (cummulative summmation) calculation kernel. - * - * This kernel calculates the cumulative sum of a given distribution (meaning that each output element - * is the sum of all its previous elements including itself) and creates a lookup table with the normalized - * pixel intensities which is used for improve the constrast of the image. - */ -class NECumulativeDistributionKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NECumulativeDistributionKernel"; - } - /** Default constructor */ - NECumulativeDistributionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECumulativeDistributionKernel(const NECumulativeDistributionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECumulativeDistributionKernel &operator=(const NECumulativeDistributionKernel &) = delete; - /** Allow instances of this class to be moved */ - NECumulativeDistributionKernel(NECumulativeDistributionKernel &&) = default; - /** Allow instances of this class to be moved */ - NECumulativeDistributionKernel &operator=(NECumulativeDistributionKernel &&) = default; - /** Set the input and output distribution. - * - * @param[in] input Input image. Data type supported: U8 - * @param[in] distribution Unnormalized 256-bin distribution of the input image. - * @param[out] cumulative_sum Cummulative distribution (Summed histogram). Should be same size as @p distribution. - * @param[out] output Equalization lookup table. Should consist of 256 entries of U8 elements. - */ - void configure(const IImage *input, const IDistribution1D *distribution, IDistribution1D *cumulative_sum, ILut *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - bool is_parallelisable() const override; - -private: - const IImage *_input; /**< Input image. */ - const IDistribution1D *_distribution; /**< Input histogram of the input image. */ - IDistribution1D *_cumulative_sum; /**< The cummulative distribution. */ - ILut *_output; /**< Output with the equalization lookup table. */ -private: - static const uint32_t _histogram_size = 256; /**< Default histogram size of 256. */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h deleted file mode 100644 index 3b2b9a1b79..0000000000 --- a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H -#define ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the depth concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class NEDepthConcatenateLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDepthConcatenateLayerKernel"; - } - /** Default constructor */ - NEDepthConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthConcatenateLayerKernel(const NEDepthConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthConcatenateLayerKernel &operator=(const NEDepthConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEDepthConcatenateLayerKernel(NEDepthConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEDepthConcatenateLayerKernel &operator=(NEDepthConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~NEDepthConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] depth_offset The offset on the Z axis. - * @param[in,out] output Output tensor info. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] depth_offset The offset on the Z axis. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; - -private: - using DepthConcatFunction = void(const ITensor *in, ITensor *out, unsigned int depth_offset, const Window &window); - -private: - DepthConcatFunction *_func; - unsigned int _depth_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h deleted file mode 100644 index e297fd7d1b..0000000000 --- a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_DEPTHCONVERTKERNEL_H -#define ARM_COMPUTE_DEPTHCONVERTKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Depth conversion kernel - * This function ignores the scale and zeroPoint of quanized tensors, i.e. QASYMM8 input is treated as uint8 values. - */ -class NEDepthConvertLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDepthConvertLayerKernel"; - } - /** Default constructor*/ - NEDepthConvertLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthConvertLayerKernel(const NEDepthConvertLayerKernel &) = delete; - /** Default move constructor */ - NEDepthConvertLayerKernel(NEDepthConvertLayerKernel &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthConvertLayerKernel &operator=(const NEDepthConvertLayerKernel &) = delete; - /** Default move assignment operator */ - NEDepthConvertLayerKernel &operator=(NEDepthConvertLayerKernel &&) = default; - /** Set the input and output of the kernel - * - * Valid conversions Input -> Output : - * - * - QASYMM8_SIGNED -> S16, S32, F32, F16 - * - QASYMM8 -> U16, S16, S32, F32, F16 - * - U8 -> U16, S16, S32, F32, F16 - * - U16 -> U8, U32 - * - S16 -> QASYMM8_SIGNED, U8, S32 - * - BFLOAT16 -> F32 - * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8 - * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8 - * - F32 -> QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8 - * - * @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/BFLOAT16/F16/F32. - * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32. - * @param[in] policy Conversion policy. - * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. - */ - void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0); - /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayerKernel - * - * @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/BFLOAT16/F16/F32. - * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32. - * @param[in] policy Conversion policy - * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; - ConvertPolicy _policy; - uint32_t _shift; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h deleted file mode 100644 index c497b2c858..0000000000 --- a/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H -#define ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the depth to space kernel */ -class NEDepthToSpaceLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDepthToSpaceLayerKernel"; - } - /** Default constructor */ - NEDepthToSpaceLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthToSpaceLayerKernel(const NEDepthToSpaceLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthToSpaceLayerKernel &operator=(const NEDepthToSpaceLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEDepthToSpaceLayerKernel(NEDepthToSpaceLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEDepthToSpaceLayerKernel &operator=(NEDepthToSpaceLayerKernel &&) = default; - /** Default destructor */ - ~NEDepthToSpaceLayerKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape x value. - */ - void configure(const ITensor *input, ITensor *output, int32_t block_shape); - /** Static function to check if given info will lead to a valid configuration of @ref NEDepthToSpaceLayerKernel. - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All - * @param[in] output Tensor output info. Data types supported: same as @p input - * @param[in] block_shape Block shape value. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; /**< Source tensor */ - ITensor *_output; /**< Destination tensor */ - int32_t _block_shape; /**< Block shape */ - DataLayout _data_layout; /**< Data layout of the operation */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h deleted file mode 100644 index eba1737a03..0000000000 --- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H -#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/utils/misc/Traits.h" -#include "support/Requires.h" - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#include -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the kernel to run a depthwise convolution native on a tensor. */ -class NEDepthwiseConvolutionLayerNativeKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDepthwiseConvolutionLayerNativeKernel"; - } - /** Default constructor */ - NEDepthwiseConvolutionLayerNativeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionLayerNativeKernel(const NEDepthwiseConvolutionLayerNativeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionLayerNativeKernel &operator=(const NEDepthwiseConvolutionLayerNativeKernel &) = delete; - /** Default Move Constructor. */ - NEDepthwiseConvolutionLayerNativeKernel(NEDepthwiseConvolutionLayerNativeKernel &&) = default; - /** Default move assignment operator */ - NEDepthwiseConvolutionLayerNativeKernel &operator=(NEDepthwiseConvolutionLayerNativeKernel &&) = default; - /** Initialize the function's source, destination and parameters. - * - * @note Supported data layouts: NHWC - * - * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [IFM, W, H]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * - */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, - const Size2D &dilation = Size2D(1U, 1U)); - /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerNativeKernel - * - * @note Supported data layouts: NHWC - * - * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor info. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, - const Size2D &dilation = Size2D(1U, 1U)); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - template - using FloatEnalber = typename std::enable_if::value, int>::type; - - template = 0> - void run_depthwise(const Window &window, bool has_biases); - - template - using Quantized8bitEnalber = typename std::enable_if < std::is_same::value || std::is_same::value, int >::type; - - template = 0> - void run_depthwise(const Window &window, bool has_biases); - - /** Common signature for all the specialised depthwise convolution native functions - * - * @param[in] window Region on which to execute the kernel. - */ - using DepthwiseFunctionPtr = void (NEDepthwiseConvolutionLayerNativeKernel::*)(const Window &window, bool has_biases); - - DepthwiseFunctionPtr _func; - const ITensor *_input; - const ITensor *_weights; - const ITensor *_biases; - ITensor *_output; - PadStrideInfo _conv_info; - unsigned int _depth_multiplier; - Size2D _dilation; - std::vector _output_multiplier; - std::vector _output_shift; - bool _has_biases; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h deleted file mode 100644 index 7b97d06e43..0000000000 --- a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the dequantization layer kernel. */ -class NEDequantizationLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDequantizationLayerKernel"; - } - /** Default constructor */ - NEDequantizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDequantizationLayerKernel(const NEDequantizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDequantizationLayerKernel &operator=(const NEDequantizationLayerKernel &) = delete; - /** Default Move Constructor. */ - NEDequantizationLayerKernel(NEDequantizationLayerKernel &&) = default; - /** Default move assignment operator */ - NEDequantizationLayerKernel &operator=(NEDequantizationLayerKernel &&) = default; - /** Default destructor */ - ~NEDequantizationLayerKernel() = default; - /** Set input, output tensors. - * - * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. - * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayerKernel - * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. - * @param[in] output Output tensor info. Data types supported: F16/F32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDerivativeKernel.h b/arm_compute/core/NEON/kernels/NEDerivativeKernel.h deleted file mode 100644 index 7a46a4194e..0000000000 --- a/arm_compute/core/NEON/kernels/NEDerivativeKernel.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDERIVATIVEKERNEL_H -#define ARM_COMPUTE_NEDERIVATIVEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to run the derivative along the X/Y directions on a tensor. - * - */ -class NEDerivativeKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDerivativeKernel"; - } - /** Default constructor */ - NEDerivativeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDerivativeKernel(const NEDerivativeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDerivativeKernel &operator=(const NEDerivativeKernel &) = delete; - /** Allow instances of this class to be moved */ - NEDerivativeKernel(NEDerivativeKernel &&) = default; - /** Allow instances of this class to be moved */ - NEDerivativeKernel &operator=(NEDerivativeKernel &&) = default; - /** Initialise the kernel's sources, destination and border - * - * @note At least one of output_x or output_y must be set - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Function to perform derivative along the X direction on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void derivative_x(const Window &window); - /** Function to perform derivative along the Y direction on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void derivative_y(const Window &window); - /** Function to perform derivative along the X and Y direction on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void derivative_xy(const Window &window); - /** Common signature for all the specialised derivative functions - * - * @param[in] window Region on which to execute the kernel. - */ - using DerivativeFunction = void (NEDerivativeKernel::*)(const Window &window); - /** Derivative function to use for the particular tensor types passed to configure() */ - DerivativeFunction _func; - -private: - const ITensor *_input; /**< Input tensor */ - ITensor *_output_x; /**< Output tensor - Derivate along the X direction */ - ITensor *_output_y; /**< Output tensor - Derivate along the Y direction */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEDERIVATIVEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDilateKernel.h b/arm_compute/core/NEON/kernels/NEDilateKernel.h deleted file mode 100644 index 424cf549a1..0000000000 --- a/arm_compute/core/NEON/kernels/NEDilateKernel.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDILATEKERNEL_H -#define ARM_COMPUTE_NEDILATEKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform boolean image dilatation */ -class NEDilateKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEDilateKernel"; - } - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEDILATEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h deleted file mode 100644 index c927aff1eb..0000000000 --- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H -#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON interface for Direct Convolution Layer kernel */ -class NEDirectConvolutionLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDirectConvolutionLayerKernel"; - } - /** Default constructor */ - NEDirectConvolutionLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDirectConvolutionLayerKernel(const NEDirectConvolutionLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDirectConvolutionLayerKernel &operator=(const NEDirectConvolutionLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEDirectConvolutionLayerKernel(NEDirectConvolutionLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEDirectConvolutionLayerKernel &operator=(NEDirectConvolutionLayerKernel &&) = default; - /** Default destructor */ - ~NEDirectConvolutionLayerKernel() = default; - /** Set the input, weights, and output tensors. - * - * @note: DirectConvolution only works in the following configurations: - * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 - * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 - * - * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * The 3rd dimension must be the same as the input's volume 3rd dimension. - * Data type supported:Same as @p input. - * @param[out] output Output tensor. - * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32 - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - */ - void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info); - /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerKernel - * - * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * The 3rd dimension must be the same as the input's volume 3rd dimension. - * Data type supported:Same as @p input. - * @param[in] output Output tensor. - * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32 - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /* Template function for optimized convolution NHWC */ - template - void convolve_nhwc_optimized(const Window &window); - - /* Template function for convolution NHWC */ - template - void convolve_nhwc(const Window &window); - - const ITensor *_input; - const ITensor *_weights; - ITensor *_output; - PadStrideInfo _conv_info; - BorderSize _border_size; - unsigned int _kernel_size; - unsigned int _num_weight_elems_read_per_row; - unsigned int _num_elems_read_per_iteration; - unsigned int _num_elems_written_per_iteration; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h deleted file mode 100644 index 552a88ce42..0000000000 --- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H -#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H - -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; -/** NEON kernel to accumulate the biases, if provided, or downscale in case of quantized input. - * - * @note We assume bias to be shared - * @note For quantized computations (i.e. @p input of S32 type) the output data type for auto-initialization must be passed as part - * of the @ref DirectConvolutionLayerOutputStageKernelInfo. - */ -class NEDirectConvolutionLayerOutputStageKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDirectConvolutionLayerOutputStageKernel"; - } - /** Default constructor */ - NEDirectConvolutionLayerOutputStageKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDirectConvolutionLayerOutputStageKernel(const NEDirectConvolutionLayerOutputStageKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDirectConvolutionLayerOutputStageKernel &operator=(const NEDirectConvolutionLayerOutputStageKernel &) = delete; - /** Allow instances of this class to be moved */ - NEDirectConvolutionLayerOutputStageKernel(NEDirectConvolutionLayerOutputStageKernel &&) = default; - /** Allow instances of this class to be moved */ - NEDirectConvolutionLayerOutputStageKernel &operator=(NEDirectConvolutionLayerOutputStageKernel &&) = default; - /** Default destructor */ - ~NEDirectConvolutionLayerOutputStageKernel() = default; - /** Set the accumulate buffer and the biases of the kernel. - * - * @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place. - * Data type supported: F16/F32/S32 - * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input - * @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) - * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr. - * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p input is S32 - * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata - */ - void configure(ITensor *input, const ITensor *bias = nullptr, ITensor *output = nullptr, - const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerOutputStageKernel - * - * @param[in] input Input to add the bias to. If @p output is not specified then accumulation is done in-place. - * Data type supported: F16/F32/S32 - * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input - * @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) - * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr. - * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p input is S32 - * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias = nullptr, const ITensorInfo *output = nullptr, - const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo()); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - using OutputStageKernel = void(ITensor *input, const ITensor *bias, const Window &window, ITensor *output, - int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias); - -private: - OutputStageKernel *_func; - ITensor *_input; - const ITensor *_bias; - ITensor *_output; - int _result_fixedpoint_multiplier; - int _result_shift; - int _result_offset_after_shift; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h deleted file mode 100644 index 7dae25c22c..0000000000 --- a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H -#define ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for an element-wise operation kernel - * - * Element-wise operation is computed by: - * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f] - * - */ -class NEElementwiseOperationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEElementwiseOperationKernel"; - } - /** Default constructor */ - NEElementwiseOperationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEElementwiseOperationKernel(const NEElementwiseOperationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEElementwiseOperationKernel &operator=(const NEElementwiseOperationKernel &) = delete; - /** Allow instances of this class to be moved */ - NEElementwiseOperationKernel(NEElementwiseOperationKernel &&) = default; - /** Allow instances of this class to be moved */ - NEElementwiseOperationKernel &operator=(NEElementwiseOperationKernel &&) = default; - /** Default destructor */ - ~NEElementwiseOperationKernel() = default; - - /** Common signature for all the specialised arithmetic functions - * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Dependent on subclass. - * @param[in] window Region on which to execute the kernel. - */ - using ElementwiseFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; - -protected: - /** Validate the argument passed to the kernel - * - * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32. - * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[in] output Output tensor. Data types supported: Dependent on subclass. - */ - static Status validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); - - /** Commmon configure function for element-wise operators with no additional options (e.g. Min, Max, SquaredDiff) - * - */ - void configure_common(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); - - /** Function to use for the particular tensor types passed to configure() */ - std::function _function; - - const ITensor *_input1; - const ITensor *_input2; - ITensor *_output; -}; - -class NEArithmeticOperationKernel : public NEElementwiseOperationKernel -{ -public: - /** Default constructor */ - NEArithmeticOperationKernel() = default; - - /** Configure kernel - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. - */ - void configure(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); - - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a Status - */ - static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); - -protected: - // Inherited methods overridden: - static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); -}; - -class NEDivisionOperationKernel : public NEArithmeticOperationKernel -{ -public: - /** Default constructor */ - NEDivisionOperationKernel() = default; - - /** Configure kernel - * - * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. - */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); - - /** Static function to check if given info will lead to a valid configuration of @ref NEDivisionOperationKernel - * - * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a Status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); - -protected: - // Inherited methods overridden: - static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); -}; - -class NEPowerOperationKernel : public NEArithmeticOperationKernel -{ -public: - /** Default constructor */ - NEPowerOperationKernel() = default; - - /** Configure kernel - * - * @param[in] input1 First tensor input info. Data types supported: F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. - */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); - - /** Static function to check if given info will lead to a valid configuration of @ref NEPowerOperationKernel - * - * @param[in] input1 First tensor input info. Data types supported: F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a Status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); - -protected: - // Inherited methods overridden: - static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); -}; - -class NEComparisonOperationKernel : public NEElementwiseOperationKernel -{ -public: - /** Default constructor */ - NEComparisonOperationKernel() = default; - - /** Configure kernel - * - * @param[in] op Comparison operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: U8. - */ - void configure(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); - - /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel - * - * @param[in] op Comparison operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: U8. - * - * @return a Status - */ - static Status validate(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); - -protected: - // Inherited methods overridden: - static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h deleted file mode 100644 index 7f9d7ad114..0000000000 --- a/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H -#define ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for an element-wise unary operation kernel - * - * Element-wise operation is computed by: - * @f[ output(x) = OP(input(x))@f] - * - */ -class NEElementwiseUnaryKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEElementwiseUnaryKernel"; - } - /** Default constructor */ - NEElementwiseUnaryKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEElementwiseUnaryKernel(const NEElementwiseUnaryKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEElementwiseUnaryKernel &operator=(const NEElementwiseUnaryKernel &) = delete; - /** Allow instances of this class to be moved */ - NEElementwiseUnaryKernel(NEElementwiseUnaryKernel &&) = default; - /** Allow instances of this class to be moved */ - NEElementwiseUnaryKernel &operator=(NEElementwiseUnaryKernel &&) = default; - /** Default destructor */ - ~NEElementwiseUnaryKernel() = default; - - /** Function to configure the @ref NEElementwiseUnaryKernel - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input First tensor input. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. - * @param[out] output Output tensor. Data types supported: Same as @p input. - */ - void configure(ElementWiseUnary op, const ITensor *input, ITensor *output); - - /** Static function to check if given info will lead to a valid configuration of @ref NEElementwiseUnaryKernel - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input First tensor input info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a Status - */ - static Status validate(ElementWiseUnary op, const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised arithmetic functions - * - * @param[in] window Region on which to execute the kernel. - */ - using ElementwiseUnaryPtr = void (NEElementwiseUnaryKernel::*)(const Window &window); - - /** Template function to run elementwise unary operation - * - * @tparam ScalarType Scalar datatype - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void elementwise_op(const Window &window); - - ElementwiseUnaryPtr _func; - const ITensor *_input; - ITensor *_output; - ElementWiseUnary _op; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEErodeKernel.h b/arm_compute/core/NEON/kernels/NEErodeKernel.h deleted file mode 100644 index 140481df17..0000000000 --- a/arm_compute/core/NEON/kernels/NEErodeKernel.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEERODEKERNEL_H -#define ARM_COMPUTE_NEERODEKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform boolean image erosion */ -class NEErodeKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEErodeKernel"; - } - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEERODEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h b/arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h deleted file mode 100644 index f7dc0b1d16..0000000000 --- a/arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H -#define ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H - -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the digit reverse operation kernel. */ -class NEFFTDigitReverseKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFFTDigitReverseKernel"; - } - /** Constructor */ - NEFFTDigitReverseKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFFTDigitReverseKernel(const NEFFTDigitReverseKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFFTDigitReverseKernel &operator=(const NEFFTDigitReverseKernel &) = delete; - /** Default Move Constructor. */ - NEFFTDigitReverseKernel(NEFFTDigitReverseKernel &&) = default; - /** Default move assignment operator */ - NEFFTDigitReverseKernel &operator=(NEFFTDigitReverseKernel &&) = default; - /** Default destructor */ - ~NEFFTDigitReverseKernel() = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor). - * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: 2 (complex tensor). - * @param[in] idx Digit reverse index tensor. Data type supported: U32 - * @param[in] config Kernel configuration. - */ - void configure(const ITensor *input, ITensor *output, const ITensor *idx, const FFTDigitReverseKernelInfo &config); - - /** Static function to check if given info will lead to a valid configuration of @ref NEFFTDigitReverseKernel - * - * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor). - * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: 2 (complex tensor). - * @param[in] idx Digit reverse index tensor info. Data type supported: U32 - * @param[in] config Kernel configuration - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - using NEFFTDigitReverseKernelFunctionPtr = void (NEFFTDigitReverseKernel::*)(const Window &window); - - template - void digit_reverse_kernel_axis_0(const Window &window); - - template - void digit_reverse_kernel_axis_1(const Window &window); - - NEFFTDigitReverseKernelFunctionPtr _func; - const ITensor *_input; - ITensor *_output; - const ITensor *_idx; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h b/arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h deleted file mode 100644 index 15663e7490..0000000000 --- a/arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H -#define ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H - -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/NEON/INEKernel.h" - -#include -#include - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the FFT kernel. */ -class NEFFTRadixStageKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFFTRadixStageKernel"; - } - /** Constructor */ - NEFFTRadixStageKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFFTRadixStageKernel(const NEFFTRadixStageKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFFTRadixStageKernel &operator=(const NEFFTRadixStageKernel &) = delete; - /** Default Move Constructor. */ - NEFFTRadixStageKernel(NEFFTRadixStageKernel &&) = default; - /** Default move assignment operator */ - NEFFTRadixStageKernel &operator=(NEFFTRadixStageKernel &&) = default; - /** Default destructor */ - ~NEFFTRadixStageKernel() = default; - /** Set the input and output tensors. - * - * @note If the output tensor is nullptr, the FFT will be performed in-place - * - * @param[in,out] input Source tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor). - * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: same as @p input. - * @param[in] config FFT descriptor metadata. - */ - void configure(ITensor *input, ITensor *output, const FFTRadixStageKernelInfo &config); - /** Static function to check if given info will lead to a valid configuration of @ref NEFFTRadixStageKernel - * - * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor). - * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: same as @p input. - * @param[in] config FFT descriptor metadata. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config); - /** Returns the radix that are support by the FFT kernel - * - * @return A set of supported radix - */ - static std::set supported_radix(); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - ITensor *_input; - ITensor *_output; - bool _run_in_place; - unsigned int _Nx; - unsigned int _axis; - unsigned int _radix; - - void set_radix_stage_axis0(const FFTRadixStageKernelInfo &config); - void set_radix_stage_axis1(const FFTRadixStageKernelInfo &config); - - using FFTFunctionPointerAxis0 = std::function; - using FFTFunctionPointerAxis1 = std::function; - - FFTFunctionPointerAxis0 _func_0; - FFTFunctionPointerAxis1 _func_1; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEFFTScaleKernel.h b/arm_compute/core/NEON/kernels/NEFFTScaleKernel.h deleted file mode 100644 index c25ba323ab..0000000000 --- a/arm_compute/core/NEON/kernels/NEFFTScaleKernel.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFFTSCALEKERNEL_H -#define ARM_COMPUTE_NEFFTSCALEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the inverse fft scale kernel. */ -class NEFFTScaleKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFFTScaleKernel"; - } - /** Constructor */ - NEFFTScaleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFFTScaleKernel(const NEFFTScaleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFFTScaleKernel &operator=(const NEFFTScaleKernel &) = delete; - /** Default Move Constructor. */ - NEFFTScaleKernel(NEFFTScaleKernel &&) = default; - /** Default move assignment operator */ - NEFFTScaleKernel &operator=(NEFFTScaleKernel &&) = default; - /** Default destructor */ - ~NEFFTScaleKernel() = default; - /** Set the input and output tensors. - * - * @param[in,out] input Source tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor). - * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: 1 (real tensor) or 2 (complex tensor). - * @param[in] config Kernel configuration - */ - void configure(ITensor *input, ITensor *output, const FFTScaleKernelInfo &config); - /** Static function to check if given info will lead to a valid configuration of @ref NEFFTScaleKernel - * - * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor). - * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: 1 (real tensor) or 2 (complex tensor). - * @param[in] config Kernel configuration - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - ITensor *_input; - ITensor *_output; - float _scale; - bool _run_in_place; - bool _is_conj; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEFFTSCALEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEFastCornersKernel.h b/arm_compute/core/NEON/kernels/NEFastCornersKernel.h deleted file mode 100644 index e4e87c032f..0000000000 --- a/arm_compute/core/NEON/kernels/NEFastCornersKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFASTCORNERSKERNEL_H -#define ARM_COMPUTE_NEFASTCORNERSKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -#include - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** NEON kernel to perform fast corners */ -class NEFastCornersKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFastCornersKernel"; - } - /** Constructor */ - NEFastCornersKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFastCornersKernel(const NEFastCornersKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFastCornersKernel &operator=(const NEFastCornersKernel &) = delete; - /** Allow instances of this class to be moved */ - NEFastCornersKernel(NEFastCornersKernel &&) = default; - /** Allow instances of this class to be moved */ - NEFastCornersKernel &operator=(NEFastCornersKernel &&) = default; - /** Initialise the kernel. - * - * @param[in] input Source image. Data type supported: U8. - * @param[out] output Output image. Data type supported: U8. - * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. - * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const IImage *input, IImage *output, uint8_t threshold, bool non_max_suppression, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - const IImage *_input; /**< source image */ - IImage *_output; /**< inermediate results */ - uint8_t _threshold; /**< threshold on difference between intensity */ - bool _non_max_suppression; /** true if non-maxima suppression is applied in the next stage */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEFASTCORNERSKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEFillArrayKernel.h b/arm_compute/core/NEON/kernels/NEFillArrayKernel.h deleted file mode 100644 index 99df8795ae..0000000000 --- a/arm_compute/core/NEON/kernels/NEFillArrayKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFILLARRAYKERNEL_H -#define ARM_COMPUTE_NEFILLARRAYKERNEL_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -#include - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** This kernel adds all texels greater than or equal to the threshold value to the keypoint array. */ -class NEFillArrayKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFillArrayKernel"; - } - /** Default contructor */ - NEFillArrayKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFillArrayKernel(const NEFillArrayKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFillArrayKernel &operator=(const NEFillArrayKernel &) = delete; - /** Allow instances of this class to be moved */ - NEFillArrayKernel(NEFillArrayKernel &&) = default; - /** Allow instances of this class to be moved */ - NEFillArrayKernel &operator=(NEFillArrayKernel &&) = default; - /** Default detructor */ - ~NEFillArrayKernel() = default; - - /** Initialise the kernel. - * - * @param[in] input Source image. Data type supported: U8. - * @param[in] threshold Texels greater than the threshold will be added to the array. - * @param[out] output Arrays of keypoints to store the results. - */ - void configure(const IImage *input, uint8_t threshold, IKeyPointArray *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - bool is_parallelisable() const override; - -private: - const IImage *_input; - IKeyPointArray *_output; - uint8_t _threshold; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEFILLARRAYKERNEL_H*/ diff --git a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h deleted file mode 100644 index 071843d114..0000000000 --- a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFILLBORDERKERNEL_H -#define ARM_COMPUTE_NEFILLBORDERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the kernel to fill borders */ -class NEFillBorderKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFillBorderKernel"; - } - /** Default Constructor */ - NEFillBorderKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFillBorderKernel(const NEFillBorderKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFillBorderKernel &operator=(const NEFillBorderKernel &) = delete; - /** Allow instances of this class to be moved */ - NEFillBorderKernel(NEFillBorderKernel &&) = default; - /** Allow instances of this class to be moved */ - NEFillBorderKernel &operator=(NEFillBorderKernel &&) = default; - /** Default destructor */ - ~NEFillBorderKernel() = default; - - /** Initialise the function. - * - * @note This kernel fills the borders within the XY-planes. - * - * @param[in,out] tensor Tensor to process. Data types supported: All. - * @param[in] border_size Size of the border to fill in elements. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - void fill_replicate_single_channel(const Window &window); - void fill_constant_value_single_channel(const Window &window); - - ITensor *_tensor; - BorderSize _border_size; - BorderMode _mode; - PixelValue _constant_border_value; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEFILLBORDERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h b/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h deleted file mode 100644 index dbd24129f1..0000000000 --- a/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFLATTENLAYERKERNEL_H -#define ARM_COMPUTE_NEFLATTENLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the flatten layer kernel. */ -class NEFlattenLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFlattenLayerKernel"; - } - /** Default constructor */ - NEFlattenLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFlattenLayerKernel(const NEFlattenLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFlattenLayerKernel &operator=(const NEFlattenLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEFlattenLayerKernel(NEFlattenLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEFlattenLayerKernel &operator=(NEFlattenLayerKernel &&) = default; - /** Default destructor */ - ~NEFlattenLayerKernel() = default; - - /** Set the input and output of the kernel. - * - * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: All - * @param[out] output Output tensor with shape [w*h*d, input_batches] where: - * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEFlattenLayerKernel - * - * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: All - * @param[out] output Output tensor with shape [w*h*d, input_batches] where: - * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEFLATTENLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEFloorKernel.h b/arm_compute/core/NEON/kernels/NEFloorKernel.h deleted file mode 100644 index 255b0d4fb9..0000000000 --- a/arm_compute/core/NEON/kernels/NEFloorKernel.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFLOORKERNEL_H -#define ARM_COMPUTE_NEFLOORKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a floor operation */ -class NEFloorKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEFloorKernel"; - } - /** Set the source, destination of the kernel - * - * @param[in] input Source tensor. Data type supported: F16/F32. - * @param[out] output Destination tensor. Same as @p input - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEFloorKernel - * - * @param[in] input Source tensor info. Data type supported: F16/F32. - * @param[in] output Destination tensor info. Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEFLOORKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h b/arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h deleted file mode 100644 index ecb17f87a2..0000000000 --- a/arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H -#define ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** OpenNE kernel to fuse the batch normalization node to a preceding convolution node */ -class NEFuseBatchNormalizationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFuseBatchNormalizationKernel"; - } - /** Default constructor */ - NEFuseBatchNormalizationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFuseBatchNormalizationKernel(const NEFuseBatchNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFuseBatchNormalizationKernel &operator=(const NEFuseBatchNormalizationKernel &) = delete; - /** Allow instances of this class to be moved */ - NEFuseBatchNormalizationKernel(NEFuseBatchNormalizationKernel &&) = default; - /** Allow instances of this class to be moved */ - NEFuseBatchNormalizationKernel &operator=(NEFuseBatchNormalizationKernel &&) = default; - /** Default destructor */ - ~NEFuseBatchNormalizationKernel() = default; - /** Set the source, destination of the kernel - * - * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC - * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights - * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights - * @param[out] fused_weights (Optional) Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights - * @param[out] fused_bias (Optional) Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights - * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights - * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights - * @note if nullptr, bn_beta is set to 0.0 - * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights - * @note if nullptr, bn_gamma is set to 1.0 - * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. - * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. - */ - void configure(const ITensor *input_weights, const ITensor *bn_mean, const ITensor *bn_var, ITensor *fused_weights, ITensor *fused_bias, - const ITensor *input_bias = nullptr, const ITensor *bn_beta = nullptr, const ITensor *bn_gamma = nullptr, - float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); - /** Static function to check if given info will lead to a valid configuration of @ref NEFuseBatchNormalizationKernel - * - * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC - * @param[in] bn_mean Batch normalization layer mean tensor info. Same as @p input_weights - * @param[in] bn_var Batch normalization layer variance tensor info. Same as @p input_weights - * @param[in] fused_weights (Optional) Output fused weights tensor info. It can be a nullptr in case of in-place computation. Same as @p input_weights - * @param[in] fused_bias (Optional) Output fused bias tensor info. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights - * @param[in] input_bias (Optional) Input bias tensor info for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights - * @param[in] bn_beta (Optional) Batch normalization layer beta tensor info. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights - * @note if nullptr, bn_beta is set to 0.0 - * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor info. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights - * @note if nullptr, bn_gamma is set to 1.0 - * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. - * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. - * - * @return a status - */ - static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, - const ITensorInfo *fused_weights, const ITensorInfo *fused_bias, - const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr, - float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input_weights; - const ITensor *_input_bias; - const ITensor *_bn_mean; - const ITensor *_bn_var; - const ITensor *_bn_gamma; - const ITensor *_bn_beta; - ITensor *_fused_weights; - ITensor *_fused_bias; - float _epsilon; - bool _run_in_place_weights; - bool _run_in_place_bias; - - using FuseBatchNormFunction = void(const ITensor *input_weights, const ITensor *input_bias, ITensor *fused_weights, ITensor *fused_bias, - const ITensor *bn_mean, const ITensor *bn_var, const ITensor *bn_beta, const ITensor *bn_gamma, float epsilon, const Window &window); - - FuseBatchNormFunction *_func; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h b/arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h deleted file mode 100644 index a2f0e8c5a8..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMASSEMBLYBASE_H -#define ARM_COMPUTE_NEGEMMASSEMBLYBASE_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Base class for GEMM NEON kernels implemented in Assembly. */ -class NEGEMMAssemblyBaseKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMAssemblyBaseKernel"; - } - /** Constructor */ - NEGEMMAssemblyBaseKernel() - : _input0(nullptr), _input1(nullptr), _output(nullptr), _workspace(nullptr), _alpha(1.f), _beta(0.f), _is_transposed_0(false), _is_transposed_1(false) - { - } - - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMAssemblyBaseKernel(const NEGEMMAssemblyBaseKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMAssemblyBaseKernel &operator=(const NEGEMMAssemblyBaseKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMAssemblyBaseKernel(NEGEMMAssemblyBaseKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMAssemblyBaseKernel &operator=(NEGEMMAssemblyBaseKernel &&) = default; - - virtual ~NEGEMMAssemblyBaseKernel() = default; - - /** Initialise the kernel's input and output. - * - * The computed function is C = a * AxB + b * C. - * - * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F32 - * @param[in] input1 Input tensor containing the Matrix B. Data types supported: same as @p input0 - * @param[in,out] output Output tensor to store the result of matrix multiplication. If @p beta is not zero the values are multiplied by @p beta before the result is accumulated. Otherwise the values are overwritten by the result. Data types supported: same as @p input0. - * @param[out] workspace Space for intermediate results. - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the accumulation. - * @param[in] is_transposed_0 (Optional)True if @p input0 is transposed else false. (Defaults to false) - * @param[in] is_transposed_1 (Optional)True if @p input1 is transposed else false. (Defaults to false) - */ - void configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha = 1.f, float beta = 0.f, bool is_transposed_0 = false, bool is_transposed_1 = false) - { - internal_configure(input0, input1, output, workspace, alpha, beta, is_transposed_0, is_transposed_1); - } - -protected: - virtual void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool _is_transposed_0, bool _is_transposed_1) = 0; - - const ITensor *_input0; - const ITensor *_input1; - ITensor *_output; - ITensor *_workspace; - float _alpha; - float _beta; - bool _is_transposed_0; - bool _is_transposed_1; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMASSEMBLYBASE_H*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h deleted file mode 100644 index 322932bab2..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H -#define ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to interleave the elements of a matrix - * - * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values) - * - * @f[ - * \left( \begin{array}{cccc} - * a00 & a01 & a02 & a03 \\ - * a10 & a11 & a12 & a13 \\ - * a20 & a21 & a22 & a23 \\ - * a30 & a31 & a32 & a33 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccccccccccccccccc} - * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\ - * \end{array} \right) - * @f] - * - * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ] - */ -class NEGEMMInterleave4x4Kernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGEMMInterleave4x4Kernel"; - } - /* Constructor */ - NEGEMMInterleave4x4Kernel(); - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMInterleave4x4Kernel - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run gemm interleave 4x4 - * - * @tparam ScalarType Scalar datatype - * - * @param[in] input Input tensor. Data types supported: uint32_t, uint16_t and uint8_t - * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void gemm_interleave4x4(const ITensor *input, ITensor *output, const Window &window); - - /** Common signature for all the specialised gemm interleave 4x4 functions - * - * @param[in] input Input tensor. Data types supported: uint32_t, uint16_t and uint8_t - * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - using GEMMInterleaveFunctionFuncPtr = void (NEGEMMInterleave4x4Kernel::*)(const ITensor *input, ITensor *output, const Window &window); - - GEMMInterleaveFunctionFuncPtr _func; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h deleted file mode 100644 index 856cdf42e7..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H -#define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to multiply matrices - * - * @note @ref NEGEMMLowpMatrixMultiplyKernel low precision matrix product kernel - * This kernel performs the following computation: - * - * -# Convert a values from int8 to int32 - * -# Convert b values from int8 to int32 - * -# Compute the int32 matrix product of the resulting a * b and store the result as int32 - * - */ -class NEGEMMLowpMatrixMultiplyKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpMatrixMultiplyKernel"; - } - /** Constructor */ - NEGEMMLowpMatrixMultiplyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpMatrixMultiplyKernel(const NEGEMMLowpMatrixMultiplyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpMatrixMultiplyKernel &operator=(const NEGEMMLowpMatrixMultiplyKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMLowpMatrixMultiplyKernel(NEGEMMLowpMatrixMultiplyKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMLowpMatrixMultiplyKernel &operator=(NEGEMMLowpMatrixMultiplyKernel &&) = default; - /** Initialise the kernel's input and output. - * - * The input matrices @p input0 and @p input1 must be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel. These two - * kernels change the layout of the original matrices to be more cache-friendly. - * - * @param[in] input0 Input tensor containing the interleaved Matrix A. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED - * @param[in] input1 Input tensor containing the transposed1xW Matrix B. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 - */ - void configure(const ITensor *input0, const ITensor *input1, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyKernel - * - * @param[in] input0 Input tensor info containing the interleaved Matrix A. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED - * @param[in] input1 Input tensor info containing the transposed Matrix B. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL - * @param[in] output Output tensor info to store the result of matrix multiplication. Data type supported: S32 - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input0; - const ITensor *_input1; - ITensor *_output; - bool _slide_matrix_b; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h deleted file mode 100644 index 5ce8403d3b..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H -#define ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel used to add the offset contribution after @ref NEGEMMLowpMatrixMultiplyKernel. The computation is performed in-place - * - * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), - * and adds to it the offset contribution of matrix A and matrix B in-place. - * - * The final result is: - * - * mm_result[i][k] = mm_result[i][k] + - * (vector_sum_col[k] * a_offset) + - * (vector_sum_row[i] * b_offset) + - * (a_offset * b_offset * k) - * - */ -class NEGEMMLowpOffsetContributionKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpOffsetContributionKernel"; - } - /** Constructor */ - NEGEMMLowpOffsetContributionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpOffsetContributionKernel(const NEGEMMLowpOffsetContributionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpOffsetContributionKernel &operator=(const NEGEMMLowpOffsetContributionKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMLowpOffsetContributionKernel(NEGEMMLowpOffsetContributionKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMLowpOffsetContributionKernel &operator=(NEGEMMLowpOffsetContributionKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in, out] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] k Number of matrix A columns or Matrix B rows - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - */ - void configure(ITensor *mm_result, const ITensor *vector_sum_col, const ITensor *vector_sum_row, int32_t k, int32_t a_offset, int32_t b_offset); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpOffsetContributionKernel - * - * @param[in] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - * - * @return a status - */ - static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, int32_t a_offset, int32_t b_offset); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_vector_sum_col; - const ITensor *_vector_sum_row; - ITensor *_mm_result; - int32_t _a_offset; - int32_t _b_offset; - int32_t _k_offset; - bool _slide_vector_sum_col; -}; -} // namespace arm_compute - -#endif /* ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h deleted file mode 100644 index 4db0872166..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H -#define ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel used to add the offset contribution and perform the output stage after @ref NEGEMMLowpMatrixMultiplyKernel. - * - * The computation is performed in-place - * - * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), - * and adds to it the offset contribution of matrix A and matrix B in-place. - * - * The output stage can perform either QuantizeDownInt32ToUint8Scale or QuantizeDownInt32ToUint8ScaleByFixedPoint for Uint8. - * The output stage can perform either QuantizeDownInt32ToInt8Scale or QuantizeDownInt32ToInt8ScaleByFixedPoint for Int8. - * - * For QuantizeDownInt32ToUint8Scale/QuantizeDownInt32ToInt8Scale the final result is: - * - * ((mm_result'[i][k] + result_offset) * result_mult_int) >> result_shift - * - * For QuantizeDownInt32ToUint8ScaleByFixedPoint/QuantizeDownInt32ToInt8ScaleByFixedPoint the final result is: - * - * (FixedPointMul(mm_result'[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift - * - * where FixedPointMul(x, y) is the nearest integer to the following - * mathematical expression, evaluated without overflow or intermediate rounding: - * - * (x * y) / 2^31 - * - * and mm_result'[i][k] = mm_result[i][k] + - * (vector_sum_col[k] * a_offset) + - * (vector_sum_row[i] * b_offset) + - * (a_offset * b_offset * k) - */ - -class NEGEMMLowpOffsetContributionOutputStageKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpOffsetContributionOutputStageKernel"; - } - /** Constructor */ - NEGEMMLowpOffsetContributionOutputStageKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpOffsetContributionOutputStageKernel(const NEGEMMLowpOffsetContributionOutputStageKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpOffsetContributionOutputStageKernel &operator=(const NEGEMMLowpOffsetContributionOutputStageKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMLowpOffsetContributionOutputStageKernel(NEGEMMLowpOffsetContributionOutputStageKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMLowpOffsetContributionOutputStageKernel &operator=(NEGEMMLowpOffsetContributionOutputStageKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p mm_result. - * @param[out] output Output tensor containing the final quantized result. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] k Number of matrix A columns or Matrix B rows - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - * @param[in] output_stage GEMMLowp output stage info, providing the type of quantization and the necessary parameters. - */ - void configure(const ITensor *mm_result, const ITensor *vector_sum_col, const ITensor *vector_sum_row, const ITensor *bias, ITensor *output, int32_t k, int32_t a_offset, int32_t b_offset, - GEMMLowpOutputStageInfo output_stage); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpOffsetContributionOutputStageKernel - * - * @param[in] mm_result Input tensor info containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32 - * @param[in] vector_sum_col Tensor info for the input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Tensor info for the input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p mm_result. - * @param[in] output Output tensor info containing the final quantized result. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - * @param[in] output_stage GEMMLowp output stage info, providing the type of quantization and the necessary parameters. - * - * @return a status - */ - static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, const ITensorInfo *output, int32_t a_offset, - int32_t b_offset, - GEMMLowpOutputStageInfo output_stage); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Function to use for the particular tensors passed to configure() */ - const ITensor *_vector_sum_col; - const ITensor *_vector_sum_row; - const ITensor *_bias; - const ITensor *_mm_result; - ITensor *_output; - int32_t _a_offset; - int32_t _b_offset; - int32_t _k_offset; - bool _slide_vector_sum_col; - GEMMLowpOutputStageInfo _output_stage; -}; -} // namespace arm_compute - -#endif /* ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h deleted file mode 100644 index 4e0c8f8fb8..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H -#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED - * - * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. - * The following computations will be performed by the kernel: - * - * -# Add offset terms to final result - * -# Multiply each entry of result by result_mult_int - * -# Add bias to final result if bias tensor is not a nullptr - * -# Shift the int32 accumulator by result_shift - * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values: - * -# -to the [0..255] range and cast to QASYMM8. - * -# -to the [-128..127] range and cast to QASYMM8_SIGNED. - * - */ -class NEGEMMLowpQuantizeDownInt32ScaleKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpQuantizeDownInt32ScaleKernel"; - } - /** Constructor */ - NEGEMMLowpQuantizeDownInt32ScaleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpQuantizeDownInt32ScaleKernel(const NEGEMMLowpQuantizeDownInt32ScaleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpQuantizeDownInt32ScaleKernel &operator=(const NEGEMMLowpQuantizeDownInt32ScaleKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMLowpQuantizeDownInt32ScaleKernel(NEGEMMLowpQuantizeDownInt32ScaleKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMLowpQuantizeDownInt32ScaleKernel &operator=(NEGEMMLowpQuantizeDownInt32ScaleKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[out] output_stage GEMMLowp output stage metadata. - */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo *output_stage); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ScaleKernel - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[out] output_stage GEMMLowp output stage metadata. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run the NEGEMMLowpQuantizeDownInt32ScaleKernel - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void run(const Window &window); - - /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ScaleKernel functions - * - * @param[in] window Region on which to execute the kernel. - */ - using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ScaleKernel::*)(const Window &window); - - QuantizeDownFunctionPtr _func; - const ITensor *_input; - const ITensor *_bias; - ITensor *_output; - const GEMMLowpOutputStageInfo *_output_stage; - bool _is_bounded_relu; -}; -} // namespace arm_compute - -#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h deleted file mode 100644 index d26c778e74..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H -#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16 - * - * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QSYMM16 value. - * The following computations will be performed by the kernel: - * - * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier - * -# Add bias to final result if bias tensor is not a nullptr - * -# Round to nearest division by a power-of-two using result_shift - * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values to the [-32768, 32767] range and cast to QSYMM16. - * - */ -class NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel"; - } - /** Constructor */ - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16 - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16. - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0. - */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel - * - * @param[in] input Input tensor info. Data type supported: S32 - * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor info with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run the NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void run(const Window &window); - - /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel functions - * - * @param[in] window Region on which to execute the kernel. - */ - using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::*)(const Window &window); - - QuantizeDownFunctionPtr _func; - const ITensor *_input; - const ITensor *_bias; - ITensor *_output; - int _result_fixedpoint_multiplier; - int _result_shift; - int _min; - int _max; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h deleted file mode 100644 index f1661680d0..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H -#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8_SIGNED - * - * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8_SIGNED value. - * The following computations will be performed by the kernel: - * - * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier - * -# Add bias to final result if bias tensor is not a nullptr - * -# Round to nearest division by a power-of-two using result_shift - * -# Add offset to each result - * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values to the [-128..127] range and cast to QASYMM8_SIGNED. - * - */ -class NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel"; - } - /** Constructor */ - NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication - * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions - */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run the NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void run(const Window &window); - - /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel functions - * - * @param[in] window Region on which to execute the kernel. - */ - using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::*)(const Window &window); - - QuantizeDownFunctionPtr _func; - const ITensor *_input; - const ITensor *_bias; - ITensor *_output; - int _result_fixedpoint_multiplier; - int _result_shift; - int _result_offset_after_shift; - int _min; - int _max; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h deleted file mode 100644 index 94ca617466..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H -#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8 - * - * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8 value. - * The following computations will be performed by the kernel: - * - * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier - * -# Add bias to final result if bias tensor is not a nullptr - * -# Round to nearest division by a power-of-two using result_shift - * -# Add offset to each result - * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8. - * - */ -class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel"; - } - /** Constructor */ - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication - * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions - */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run the NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void run(const Window &window); - - /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel functions - * - * @param[in] window Region on which to execute the kernel. - */ - using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::*)(const Window &window); - - QuantizeDownFunctionPtr _func; - const ITensor *_input; - const ITensor *_bias; - ITensor *_output; - int _result_fixedpoint_multiplier; - int _result_shift; - int _result_offset_after_shift; - int _min; - int _max; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h deleted file mode 100644 index f41941f796..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H -#define ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; -struct GEMMLowpReductionKernelInfo; - -/** Common interface for all NEON reduction kernels */ -class INEGEMMLowpReductionKernel : public INEKernel -{ -public: - /** Constructor */ - INEGEMMLowpReductionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - INEGEMMLowpReductionKernel(const INEGEMMLowpReductionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - INEGEMMLowpReductionKernel &operator=(const INEGEMMLowpReductionKernel &) = delete; - /** Allow instances of this class to be moved */ - INEGEMMLowpReductionKernel(INEGEMMLowpReductionKernel &&) = default; - /** Allow instances of this class to be moved */ - INEGEMMLowpReductionKernel &operator=(INEGEMMLowpReductionKernel &&) = default; - - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL - * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - */ - virtual void configure(const ITensor *input, ITensor *output, const GEMMLowpReductionKernelInfo &info) = 0; - -protected: - const ITensor *_input; - ITensor *_output; - int32_t _k; - int32_t _scalar; - bool _mul_by_scalar; -}; - -/** NEON kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A. - * - * @note This stage is needed to handle the offset of matrix product - * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md - */ -class NEGEMMLowpMatrixAReductionKernel : public INEGEMMLowpReductionKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpMatrixAReductionKernel"; - } - /** Initialise the kernel's input and output. - * - * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL - * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k (num_mtx_a_cols) Number of matrix A columns - * - is_reshaped (is_interleaved4x4) True if the matrix A has been interleaved4x4 - * - scalar Scalar value to multiply each reduced row by. - * - mul_byscalar True if each reduced column must be multiplied by a scalar value. - */ - void configure(const ITensor *mtx_a, ITensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override; - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixAReductionKernel - * - * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL - * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k (num_mtx_a_cols) Number of matrix A columns - * - is_reshaped (is_interleaved4x4) True if the matrix A has been interleaved4x4 - * - scalar Scalar value to multiply each reduced row by. - * - mul_byscalar True if each reduced column must be multiplied by a scalar value. - * - * @return a status - */ - static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Execution of the reduction kernel specialized on the input type - * - * @param[in] window Execution window - */ - template - void run_internal(const Window &window); -}; - -/** NEON kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B. - * - * @note This stage is needed to handle the offset of matrix product - * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md - */ -class NEGEMMLowpMatrixBReductionKernel : public INEGEMMLowpReductionKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpMatrixBReductionKernel"; - } - /** Initialise the kernel's input and output. - * - * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL - * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k (num_mtx_b_rows) Number of matrix B rows. - * - is_reshaped (is_transposed1xW) True if the input tensor is transposed 1xW. - * - scalar Scalar value to multiply each reduced row by. - * - mul_byscalar True if each reduced row must be multiplied by a scalar value. - */ - void configure(const ITensor *mtx_b, ITensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override; - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixBReductionKernel - * - * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL - * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k (num_mtx_b_rows) Number of matrix B rows. - * - is_reshaped (is_transposed1xW) True if the input tensor is transposed 1xW. - * - scalar Scalar value to multiply each reduced row by. - * - mul_byscalar True if each reduced row must be multiplied by a scalar value. - * - * @return a status - */ - static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Execution of the reduction kernel specialized on the input type - * - * @param[in] window Execution window - * @param[in] info Thread-related information - */ - template - void run_internal(const Window &window, const ThreadInfo &info); -}; -} // namespace arm_compute - -#endif /* ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h deleted file mode 100644 index 79f62561da..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H -#define ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta: - * - * @note [ MTX_OUT = MTX_0 + beta * MTX_1 ] with MTX_0 and MTX_1 of the same size - * - * @note This stage is used to finalize the GEMM result and it is computed if and only if beta != 0.0. In case this kernel is used for finalizing GEMM result, we have: - * - MTX_0 = A * B * alpha, where MTX_0 is the output of @ref NEGEMMMatrixMultiplyKernel - * - MTX_1 = C - */ -class NEGEMMMatrixAdditionKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGEMMMatrixAdditionKernel"; - } - /** Constructor */ - NEGEMMMatrixAdditionKernel(); - /** Prevent instances of this class from being copied */ - NEGEMMMatrixAdditionKernel(const NEGEMMMatrixAdditionKernel &) = delete; - /** Prevent instances of this class from being copied */ - NEGEMMMatrixAdditionKernel &operator=(const NEGEMMMatrixAdditionKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMMatrixAdditionKernel(NEGEMMMatrixAdditionKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMMatrixAdditionKernel &operator=(NEGEMMMatrixAdditionKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @note The input and output tensor must have the same dimensions - * - * @param[in] input Input tensor (Matrix C). Data types supported: F16/F32 - * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input. - * @param[in] beta Weight of matrix C - */ - void configure(const ITensor *input, ITensor *output, float beta); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixAdditionKernel. - * - * @note The input and output tensor must have the same dimensions - * - * @param[in] input Input tensor info (Matrix C). Data types supported: F16/F32 - * @param[in] output Output tensor info. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input. - * @param[in] beta Weight of matrix C - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the matrix addition functions - * - * @param[in] input An input tensor. Data types supported: F16/F32 - * @param[out] output The output tensor. Data type supported: same as @p input - * @param[in] window Region on which to execute the kernel. - * @param[in] beta Weight of matrix C - */ - using MatrixAdditionFunction = void(const ITensor *input, ITensor *output, const Window &window, float beta); - /** Matrix addition function to use for the particular tensor types passed to configure() */ - MatrixAdditionFunction *_func; - float _beta; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h deleted file mode 100644 index f79e07ebb4..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H -#define ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to multiply two input matrices "A" and "B". All elements of the output matrix/vector will be multiplied by alpha after the matrix multiplication - * - * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref NEGEMMInterleave4x4Kernel" and @ref NEGEMMTranspose1xWKernel - * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped - * - */ -class NEGEMMMatrixMultiplyKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMMatrixMultiplyKernel"; - } - /** Constructor */ - NEGEMMMatrixMultiplyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMMatrixMultiplyKernel(const NEGEMMMatrixMultiplyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMMatrixMultiplyKernel &operator=(const NEGEMMMatrixMultiplyKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMMatrixMultiplyKernel(NEGEMMMatrixMultiplyKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMMatrixMultiplyKernel &operator=(NEGEMMMatrixMultiplyKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel - * These two kernels change the layout of the original matrices to be more cache-friendly. - * - * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 - * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. - * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. - * @param[in] alpha Weight of the matrix product - * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel - * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped - */ - void configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixMultiplyKernel - * - * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 - * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. - * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 - * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. - * @param[in] alpha Weight of the matrix product - * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel - * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input0; - const ITensor *_input1; - ITensor *_output; - float _alpha; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h deleted file mode 100644 index 756ac6a852..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H -#define ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** NEON kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor) - * - * Following an example of how the transposition1xW works when the input data is F32 - * - * @f[ - * \left( \begin{array}{cccc} - * a00 & a01 & a02 & a03 \\ - * a10 & a11 & a12 & a13 \\ - * a20 & a21 & a22 & a23 \\ - * a30 & a31 & a32 & a33 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccccccccccccccccc} - * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\ - * \end{array} \right) - * @f] - * - * Following an example of how the transposition1xW works when the input data type is F16 - * - * @f[ - * \left( \begin{array}{cccccccc} - * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 \\ - * a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 \\ - * a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 \\ - * a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc} - * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 & a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 & a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 & a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37\\ - * \end{array} \right) - * @f] - * - * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor) - * - */ -class NEGEMMTranspose1xWKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGEMMTranspose1xWKernel"; - } - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xWKernel - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] output Output tensor info. Data type supported: same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGatherKernel.h b/arm_compute/core/NEON/kernels/NEGatherKernel.h deleted file mode 100644 index 31d4f19ed0..0000000000 --- a/arm_compute/core/NEON/kernels/NEGatherKernel.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_NEGATHERKERNEL_H -#define ARM_COMPUTE_NEGATHERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Kernel to perform other operation on NEON */ -class NEGatherKernel : public INEKernel -{ -public: - /** Default constructor. */ - NEGatherKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NEGatherKernel(const NEGatherKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NEGatherKernel &operator=(const NEGatherKernel &) = delete; - /** Allow instances of this class to be moved. */ - NEGatherKernel(NEGatherKernel &&) = default; - /** Allow instances of this class to be moved. */ - NEGatherKernel &operator=(NEGatherKernel &&) = default; - /** Default detructor */ - ~NEGatherKernel() = default; - - /** Name of the kernel - * - * @return Kernel name - */ - const char *name() const override - { - return "NEGatherKernel"; - } - /** Initialise the kernel's inputs and outputs - * - * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All - * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 - */ - void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0); - /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel - * - * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All - * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) - * @param[in] output Destination tensor info. Data type supported: Same as @p input - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Implementation of the gather operation for 0 axis. - * - * For gather on the 0 axis an element by element copy is performed. - * - * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) - * @param[in] info Info about executing thread and CPU. - */ - template - void gather_0_axis(const Window &window, const ThreadInfo &info); - - /** Implementation of the gather operation. - * - * For 1<=axis a row-wise copy is taking place. - * - * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) - * @param[in] info Info about executing thread and CPU. - */ - template - void gather_n_axis(const Window &window, const ThreadInfo &info); - - using kernel_ptr = void (NEGatherKernel::*)(const Window &window, const ThreadInfo &info); - - const ITensor *_input; - const ITensor *_indices; - int _axis; - ITensor *_output; - kernel_ptr _func; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEGATHERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h b/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h deleted file mode 100644 index c8141817db..0000000000 --- a/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H -#define ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a Gaussian 3x3 filter */ -class NEGaussian3x3Kernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGaussian3x3Kernel"; - } - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h b/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h deleted file mode 100644 index b489f4b458..0000000000 --- a/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H -#define ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a Gaussian 5x5 filter (horizontal pass) */ -class NEGaussian5x5HorKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGaussian5x5HorKernel"; - } - /** Default constructor */ - NEGaussian5x5HorKernel(); - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - BorderSize _border_size; -}; - -/** NEON kernel to perform a Gaussian 5x5 filter (vertical pass) */ -class NEGaussian5x5VertKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGaussian5x5VertKernel"; - } - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data type supported: S16. - * @param[out] output Destination tensor, Data type supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h b/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h deleted file mode 100644 index 33a4452382..0000000000 --- a/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H -#define ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a GaussianPyramid (horizontal pass) */ -class NEGaussianPyramidHorKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGaussianPyramidHorKernel"; - } - /** Default constructor */ - NEGaussianPyramidHorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &&) = default; - /** Default destructor */ - ~NEGaussianPyramidHorKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Output should have half the input width. Data type supported: S16. - */ - void configure(const ITensor *input, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - int _l2_load_offset; -}; - -/** NEON kernel to perform a GaussianPyramid (vertical pass) */ -class NEGaussianPyramidVertKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGaussianPyramidVertKernel"; - } - /** Default constructor */ - NEGaussianPyramidVertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &&) = default; - /** Default destructor */ - ~NEGaussianPyramidVertKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data type supported: S16. - * @param[out] output Destination tensor. Output should have half the input height. Data type supported: U8. - */ - void configure(const ITensor *input, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - int _t2_load_offset; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h b/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h deleted file mode 100644 index 7b82488c44..0000000000 --- a/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H -#define ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -namespace arm_compute -{ -class ITensor; - -/** Interface for Compute All Anchors kernel */ -class NEComputeAllAnchorsKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEComputeAllAnchorsKernel"; - } - - /** Default constructor */ - NEComputeAllAnchorsKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEComputeAllAnchorsKernel(const NEComputeAllAnchorsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEComputeAllAnchorsKernel &operator=(const NEComputeAllAnchorsKernel &) = delete; - /** Allow instances of this class to be moved */ - NEComputeAllAnchorsKernel(NEComputeAllAnchorsKernel &&) = default; - /** Allow instances of this class to be moved */ - NEComputeAllAnchorsKernel &operator=(NEComputeAllAnchorsKernel &&) = default; - /** Default destructor */ - ~NEComputeAllAnchorsKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 - * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input - * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo - * - */ - void configure(const ITensor *anchors, ITensor *all_anchors, const ComputeAnchorsInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref NEComputeAllAnchorsKernel - * - * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 - * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input - * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo - * - * @return a Status - */ - static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - template - void internal_run(const Window &window); - - const ITensor *_anchors; - ITensor *_all_anchors; - ComputeAnchorsInfo _anchors_info; -}; -} // arm_compute -#endif // ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H diff --git a/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h deleted file mode 100644 index b0206ec091..0000000000 --- a/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H -#define ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H - -#include "arm_compute/core/IHOG.h" -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Size2D.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform HOG Orientation Binning */ -class NEHOGOrientationBinningKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEHOGOrientationBinningKernel"; - } - /** Default constructor */ - NEHOGOrientationBinningKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGOrientationBinningKernel(const NEHOGOrientationBinningKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGOrientationBinningKernel &operator=(const NEHOGOrientationBinningKernel &) = delete; - /** Allow instances of this class to be moved */ - NEHOGOrientationBinningKernel(NEHOGOrientationBinningKernel &&) = default; - /** Allow instances of this class to be moved */ - NEHOGOrientationBinningKernel &operator=(NEHOGOrientationBinningKernel &&) = default; - /** Default destructor */ - ~NEHOGOrientationBinningKernel() = default; - - /** Initialise the kernel's inputs, output and HOG's metadata - * - * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. - * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 - * @param[out] output Output tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[in] hog_info HOG's metadata - */ - void configure(const ITensor *input_magnitude, const ITensor *input_phase, ITensor *output, const HOGInfo *hog_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised block normalization functions - * - * @param[in] mag_row_ptr Pointer to the first row of the cell in the magnitude tensor - * @param[in] phase_row_ptr Pointer to the first row of the cell in the phase tensor - * @param[out] output_ptr Pointer to the output cell of hog space tensor - * @param[in] mag_stride Stride of the magnitude tensor - * @param[in] phase_stride Stride of the phase tensor - * @param[in] cell_width Width of the cell - * @param[in] cell_height Height of the cell - * @param[in] num_bins Number of bins for each cell - * @param[in] phase_scale Scale factor to apply to the phase in order to calculate the histogram index - */ - using OrientBinFunc = void(const int16_t *__restrict mag_row_ptr, const uint8_t *__restrict phase_row_ptr, float *__restrict output_ptr, size_t mag_stride, size_t phase_stride, size_t cell_width, - size_t cell_height, size_t num_bins, float phase_scale); - /** Orientation binning function to use for the particular cell width passed to configure() */ - OrientBinFunc *_func; - const ITensor *_input_magnitude; - const ITensor *_input_phase; - ITensor *_output; - size_t _cell_width; - size_t _cell_height; - size_t _num_bins; - float _phase_scale; -}; - -/** NEON kernel to perform HOG block normalization */ -class NEHOGBlockNormalizationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEHOGBlockNormalizationKernel"; - } - /** Default constructor */ - NEHOGBlockNormalizationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGBlockNormalizationKernel(const NEHOGBlockNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGBlockNormalizationKernel &operator=(const NEHOGBlockNormalizationKernel &) = delete; - /** Allow instances of this class to be moved */ - NEHOGBlockNormalizationKernel(NEHOGBlockNormalizationKernel &&) = default; - /** Allow instances of this class to be moved */ - NEHOGBlockNormalizationKernel &operator=(NEHOGBlockNormalizationKernel &&) = default; - /** Default destructor */ - ~NEHOGBlockNormalizationKernel() = default; - - /** Initialise the kernel's input, output and HOG's metadata - * - * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog_info HOG's metadata - */ - void configure(const ITensor *input, ITensor *output, const HOGInfo *hog_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised block normalization functions - * - * @param[in] input_row_ptr Pointer to the first row of the block in the input hog space tensor - * @param[out] output_ptr Pointer to the output block of the hog normalized space - * @param[in] input_stride Stride of the input hog space tensor - * @param[in] num_cells_per_block_height Number of cells per block along the Y direction - * @param[in] num_bins_block_x Number of bins per block along the X direction - * @param[in] num_bins_block Number of total bins per block - * @param[in] l2_hyst_threshold Threshold to use for l2 hysteresis normalization - */ - using BlockNormFunc = void(const float *input_row_ptr, float *output_ptr, size_t input_stride, size_t num_cells_per_block_height, size_t num_bins_block_x, size_t num_bins_block, - float l2_hyst_threshold); - /** Block normalization function to use for the particular normalization type passed to configure() */ - BlockNormFunc *_func; - const ITensor *_input; - ITensor *_output; - Size2D _num_cells_per_block; - Size2D _num_cells_per_block_stride; - size_t _num_bins; - float _l2_hyst_threshold; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h deleted file mode 100644 index 2c23a2b11d..0000000000 --- a/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGDETECTORKERNEL_H -#define ARM_COMPUTE_NEHOGDETECTORKERNEL_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/IHOG.h" -#include "arm_compute/core/NEON/INEKernel.h" -#include "support/Mutex.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform HOG detector kernel using linear SVM */ -class NEHOGDetectorKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEHOGDetectorKernel"; - } - /** Default constructor */ - NEHOGDetectorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGDetectorKernel(const NEHOGDetectorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGDetectorKernel &operator=(const NEHOGDetectorKernel &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHOGDetectorKernel(NEHOGDetectorKernel &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHOGDetectorKernel &operator=(NEHOGDetectorKernel &&) = delete; - /** Default destructor */ - ~NEHOGDetectorKernel() = default; - - /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect - * - * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref NEHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog HOG data object used by @ref NEHOGOrientationBinningKernel and @ref NEHOGBlockNormalizationKernel - * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects - * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. - * It must be multiple of the hog->info()->block_stride() - * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane - * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to - */ - void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, uint16_t idx_class = 0); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - IDetectionWindowArray *_detection_windows; - const float *_hog_descriptor; - float _bias; - float _threshold; - uint16_t _idx_class; - size_t _num_bins_per_descriptor_x; - size_t _num_blocks_per_descriptor_y; - size_t _block_stride_width; - size_t _block_stride_height; - size_t _detection_window_width; - size_t _detection_window_height; - size_t _max_num_detection_windows; - arm_compute::Mutex _mutex; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEHOGDETECTORKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h deleted file mode 100644 index 084dd7deba..0000000000 --- a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHARRISCORNERSKERNEL_H -#define ARM_COMPUTE_NEHARRISCORNERSKERNEL_H - -#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" -#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/INEKernel.h" - -#include - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** Common interface for all Harris Score kernels */ -class INEHarrisScoreKernel : public INEKernel -{ -public: - /** Default constructor */ - INEHarrisScoreKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - INEHarrisScoreKernel(const INEHarrisScoreKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - INEHarrisScoreKernel &operator=(const INEHarrisScoreKernel &) = delete; - /** Allow instances of this class to be moved */ - INEHarrisScoreKernel(INEHarrisScoreKernel &&) = default; - /** Allow instances of this class to be moved */ - INEHarrisScoreKernel &operator=(INEHarrisScoreKernel &&) = default; - /** Default destructor */ - ~INEHarrisScoreKernel() = default; - -public: - /** Setup the kernel parameters - * - * @param[in] input1 Source image (gradient X). Data types supported: S16/S32 - * @param[in] input2 Source image (gradient Y). Data types supported: same as @ input1 - * @param[out] output Destination image (harris score). Data types supported: F32 - * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) - * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). - * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - virtual void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) = 0; - -protected: - const IImage *_input1; /**< Source image - Gx component */ - const IImage *_input2; /**< Source image - Gy component */ - IImage *_output; /**< Source image - Harris score */ - float _sensitivity; /**< Sensitivity value */ - float _strength_thresh; /**< Threshold value */ - float _norm_factor; /**< Normalization factor */ - BorderSize _border_size; /**< Border size */ -}; - -/** Template NEON kernel to perform Harris Score. - * The implementation supports 3, 5, and 7 for the block_size - */ -template -class NEHarrisScoreKernel : public INEHarrisScoreKernel -{ -public: - const char *name() const override - { - return "NEHarrisScoreKernel"; - } - /** Default constructor */ - NEHarrisScoreKernel(); - // Inherited methods overridden: - void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override; - BorderSize border_size() const override; - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised harris score functions */ - using HarrisScoreFunction = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, - float norm_factor, float sensitivity, float strength_thresh); - /** Harris Score function to use for the particular image types passed to configure() */ - HarrisScoreFunction *_func; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEHARRISCORNERSKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h deleted file mode 100644 index 8a5e86acc4..0000000000 --- a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H -#define ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the height concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class NEHeightConcatenateLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEHeightConcatenateLayerKernel"; - } - /** Default constructor */ - NEHeightConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHeightConcatenateLayerKernel(const NEHeightConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHeightConcatenateLayerKernel &operator=(const NEHeightConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEHeightConcatenateLayerKernel(NEHeightConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEHeightConcatenateLayerKernel &operator=(NEHeightConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~NEHeightConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[in,out] output Output tensor info. Data types supported: Same as @p input. - * - */ - void configure(const ITensorInfo *input, unsigned int height_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEHeightConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; - -private: - unsigned int _height_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEHistogramKernel.h b/arm_compute/core/NEON/kernels/NEHistogramKernel.h deleted file mode 100644 index 6e5b92273b..0000000000 --- a/arm_compute/core/NEON/kernels/NEHistogramKernel.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHISTOGRAMKERNEL_H -#define ARM_COMPUTE_NEHISTOGRAMKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "support/Mutex.h" - -#include -#include - -namespace arm_compute -{ -class IDistribution1D; -class ITensor; -using IImage = ITensor; - -/** Interface for the histogram kernel */ -class NEHistogramKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEHistogramKernel"; - } - /** Default constructor */ - NEHistogramKernel(); - /** Default destructor */ - ~NEHistogramKernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHistogramKernel(const NEHistogramKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHistogramKernel &operator=(const NEHistogramKernel &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHistogramKernel(NEHistogramKernel &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHistogramKernel &operator=(NEHistogramKernel &&) = delete; - - /** Set the input image and the distribution output. - * - * @param[in] input Source image. Data type supported: U8. - * @param[out] output Destination distribution. - * @param[in,out] local_hist Array that the threads use to save their local histograms. - * It's size should be equal to (number_of_threads * num_bins), - * and the Window::thread_id() is used to determine the part of the array - * used by each thread. - * @param[out] window_lut LUT with pre-calculated possible window values. - * The size of the LUT should be equal to max_range_size and it will be filled - * during the configure stage, while it re-used in every run, therefore can be - * safely shared among threads. - */ - void configure(const IImage *input, IDistribution1D *output, uint32_t *local_hist, uint32_t *window_lut); - /** Set the input image and the distribution output. - * - * @note Used for histogram of fixed size equal to 256 - * - * @param[in] input Source image. Data type supported: U8. - * @param[out] output Destination distribution which must be of 256 bins.. - */ - void configure(const IImage *input, IDistribution1D *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Function to merge multiple partial histograms. - * - * @param[out] global_hist Pointer to the final histogram. - * @param[in] local_hist Pointer to the partial histograms. - * @param[in] bins Number of bins. - */ - void merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins); - /** Function to merge multiple minimum values of partial histograms. - * - * @param[out] global_min Pointer to the global min value. - * @param[in] local_min Local min value. - */ - void merge_min(uint8_t *global_min, const uint8_t &local_min); - /** Function to perform histogram on the given window - * - * @param[in] win Region on which to execute the kernel - * @param[in] info Info about the executing thread - */ - void histogram_U8(Window win, const ThreadInfo &info); - /** Function to perform histogram on the given window where histogram is - * of fixed size 256 without ranges and offsets. - * - * @param[in] win Region on which to execute the kernel - * @param[in] info Info about the executing thread - */ - void histogram_fixed_U8(Window win, const ThreadInfo &info); - /** Pre-calculate the pixel windowing for every possible pixel - * - * Calculate (V - offset) * numBins / range where V is every possible pixel value. - * - * @note We currently support U8 image thus possible pixel values are between 0 and 255 - */ - void calculate_window_lut() const; - /** Common signature for all the specialised Histogram functions - * - * @param[in] window Region on which to execute the kernel. - */ - using HistogramFunctionPtr = void (NEHistogramKernel::*)(Window window, const ThreadInfo &info); - - HistogramFunctionPtr _func; ///< Histogram function to use for the particular image types passed to configure() - const IImage *_input; - IDistribution1D *_output; - uint32_t *_local_hist; - uint32_t *_window_lut; - arm_compute::Mutex _hist_mtx; - static constexpr unsigned int _max_range_size{ 256 }; ///< 256 possible pixel values as we handle only U8 images -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEHISTOGRAMKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h deleted file mode 100644 index 95825ade18..0000000000 --- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEIM2COLKERNEL_H -#define ARM_COMPUTE_NEIM2COLKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; -class Size2D; - -/** Interface for the im2col reshape kernel. - * - * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. - * It is used to transform a convolution to a plain matrix multiplication. - * - * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have: - * - * @f[ - * \left( \begin{array}{cccc} - * a00 & a01 & a02 & a03 \\ - * a10 & a11 & a12 & a13 \\ - * a20 & a21 & a22 & a23 \\ - * a30 & a31 & a32 & a33 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccccccccc} - * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\ - * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\ - * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\ - * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\ - * \end{array} \right) - * @f] - */ -class NEIm2ColKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEIm2ColKernel"; - } - /** Default constructor */ - NEIm2ColKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEIm2ColKernel(const NEIm2ColKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEIm2ColKernel &operator=(const NEIm2ColKernel &) = delete; - /** Allow instances of this class to be moved */ - NEIm2ColKernel(NEIm2ColKernel &&) = default; - /** Allow instances of this class to be moved */ - NEIm2ColKernel &operator=(NEIm2ColKernel &&) = default; - /** Default destructor */ - ~NEIm2ColKernel() = default; - - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 - * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false - * @param[out] output The output tensor. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported - */ - void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, - bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1); - /** Static function to check if given info will lead to a valid configuration of @ref NEIm2ColKernel - * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 - * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, - bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run im2col - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void run_im2col(const Window &window); - - /** Common signature for all the specialised im2col functions - * - * @param[in] window Region on which to execute the kernel. - */ - using Im2ColFunctionPtr = void (NEIm2ColKernel::*)(const Window &window); - - Im2ColFunctionPtr _func; - const ITensor *_input; - ITensor *_output; - std::pair _convolved_dims; - PadStrideInfo _conv_info; - unsigned int _kernel_width; - unsigned int _kernel_height; - bool _has_bias; - Size2D _dilation; - DataLayout _data_layout; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEIM2COLKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h deleted file mode 100644 index a5bd453ac7..0000000000 --- a/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; -struct InstanceNormalizationLayerKernelInfo; - -/** Interface for performing an instance normalization */ -class NEInstanceNormalizationLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEInstanceNormalizationLayerKernel"; - } - /** Default constructor */ - NEInstanceNormalizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEInstanceNormalizationLayerKernel(const NEInstanceNormalizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEInstanceNormalizationLayerKernel &operator=(const NEInstanceNormalizationLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEInstanceNormalizationLayerKernel(NEInstanceNormalizationLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEInstanceNormalizationLayerKernel &operator=(NEInstanceNormalizationLayerKernel &&) = default; - /** Default destructor */ - ~NEInstanceNormalizationLayerKernel() = default; - /** Set the input and output tensors. - * - * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW - * In case of @p output tensor = nullptr this tensor will store the result of the normalization. - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * @param[in] info Kernel meta-data descriptor - */ - void configure(ITensor *input, ITensor *output, const InstanceNormalizationLayerKernelInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref NEInstanceNormalizationLayer. - * - * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NCHW - * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. - * @param[in] info Kernel meta-data descriptor - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialized instance normalization functions - * - * @param[in, out] input An input tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization. - * @param[out] output The output tensor. - * @param[in] gamma The scale scalar value applied to the normalized tensor. Defaults to 1.0 - * @param[in] beta The offset scalar value applied to the normalized tensor. Defaults to 0.0 - * @param[in] epsilon Lower bound value for the normalization. Defaults to 1e-12 - */ - using NormalizationFunction = void(ITensor *input, ITensor *output, float gamma, float beta, float epsilon, const Window &window); - - NormalizationFunction *_func; - ITensor *_input; - ITensor *_output; - float _gamma; - float _beta; - float _epsilon; - bool _use_mixed_precision{ true }; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h b/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h deleted file mode 100644 index 57f24befdb..0000000000 --- a/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H -#define ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Kernel to perform an image integral on an image */ -class NEIntegralImageKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEIntegralImageKernel"; - } - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: U32 - */ - void configure(const ITensor *input, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - bool is_parallelisable() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h b/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h deleted file mode 100644 index 302d04e9f3..0000000000 --- a/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H -#define ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */ -class NEL2NormalizeLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEL2NormalizeLayerKernel"; - } - /** Default constructor */ - NEL2NormalizeLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEL2NormalizeLayerKernel(const NEL2NormalizeLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEL2NormalizeLayerKernel &operator=(const NEL2NormalizeLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEL2NormalizeLayerKernel(NEL2NormalizeLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEL2NormalizeLayerKernel &operator=(NEL2NormalizeLayerKernel &&) = default; - /** Default destructor */ - ~NEL2NormalizeLayerKernel() = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F16/F32. - * @param[in] sum Sum values tensor. Data types supported: same as @p input. - * Sum will have the same number of dimensions as input. - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 - * @param[in] epsilon Lower bound value for the normalization. - */ - void configure(const ITensor *input, const ITensor *sum, ITensor *output, int axis, float epsilon); - - /** Static function to check if given info will lead to a valid configuration of @ref NEL2NormalizeLayerKernel. - * - * @param[in] input Source tensor info. Data types supported: F16/F32. - * @param[in] sum Sum values tensor info. Data types supported: same as @p input. - * Sum will have the same number of dimensions as input. - * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 - * @param[in] epsilon Lower bound value for the normalization. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - const ITensor *_sum; - ITensor *_output; - unsigned int _actual_axis; - float _epsilon; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h deleted file mode 100644 index 90e5f41f8a..0000000000 --- a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_LKTRACKERKERNEL_H -#define ARM_COMPUTE_LKTRACKERKERNEL_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -#include -#include -#include -#include - -namespace arm_compute -{ -class ITensor; - -/** Internal keypoint class for Lucas-Kanade Optical Flow */ -struct NELKInternalKeypoint -{ - float x{ 0.f }; /**< x coordinate of the keypoint */ - float y{ 0.f }; /**< y coordinate of the keypoint */ - bool tracking_status{ false }; /**< the tracking status of the keypoint */ -}; - -/** Interface for NEON Array of Internal Key Points. */ -using INELKInternalKeypointArray = IArray; - -/** Interface for the Lucas-Kanade tracker kernel */ -class NELKTrackerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NELKTrackerKernel"; - } - /** Default constructor */ - NELKTrackerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELKTrackerKernel(const NELKTrackerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELKTrackerKernel &operator=(const NELKTrackerKernel &) = delete; - /** Allow instances of this class to be moved */ - NELKTrackerKernel(NELKTrackerKernel &&) = default; - /** Allow instances of this class to be moved */ - NELKTrackerKernel &operator=(NELKTrackerKernel &&) = default; - /** Default destructor */ - ~NELKTrackerKernel() = default; - - /** Initialise the kernel input and output - * - * @param[in] input_old Pointer to the input old tensor. Data type supported: U8 - * @param[in] input_new Pointer to the input new tensor. Data type supported. U8 - * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data type supported: S16 - * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data type supported: S16 - * @param[in] old_points Pointer to the IKeyPointArray storing old key points - * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points - * @param[out] new_points Pointer to the IKeyPointArray storing new key points - * @param[in, out] old_points_internal Pointer to the array of NELKInternalKeypoint for old points - * @param[out] new_points_internal Pointer to the array of NELKInternalKeypoint for new points - * @param[in] termination The criteria to terminate the search of each keypoint. - * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used - * @param[in] epsilon The error for terminating the algorithm - * @param[in] num_iterations The maximum number of iterations before terminate the algorithm - * @param[in] window_dimension The size of the window on which to perform the algorithm - * @param[in] level The pyramid level - * @param[in] num_levels The number of pyramid levels - * @param[in] pyramid_scale Scale factor used for generating the pyramid - */ - void configure(const ITensor *input_old, const ITensor *input_new, const ITensor *old_scharr_gx, const ITensor *old_scharr_gy, - const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, IKeyPointArray *new_points, - INELKInternalKeypointArray *old_points_internal, INELKInternalKeypointArray *new_points_internal, - Termination termination, bool use_initial_estimate, float epsilon, unsigned int num_iterations, size_t window_dimension, - size_t level, size_t num_levels, float pyramid_scale); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Initialise the array of keypoints in the provide range - * - * @param[in] start Index of first element in the keypoints array to be initialised - * @param[in] end Index after last elelemnt in the keypoints array to be initialised - */ - void init_keypoints(int start, int end); - /** Compute the structure tensor A^T * A based on the scharr gradients I_x and I_y - * - * @param[in] keypoint Keypoint for which gradients are computed - * @param[out] bilinear_ix Intermediate interpolated data for X gradient - * @param[out] bilinear_iy Intermediate interpolated data for Y gradient - * - * @return Values A11, A12, A22 - */ - std::tuple compute_spatial_gradient_matrix(const NELKInternalKeypoint &keypoint, int32_t *bilinear_ix, int32_t *bilinear_iy); - /** Compute the vector A^T * b, i.e. -sum(I_d * I_t) for d in {x,y} - * - * @param[in] old_keypoint Old keypoint for which gradient is computed - * @param[in] new_keypoint New keypoint for which gradient is computed - * @param[in] bilinear_ix Intermediate interpolated data for X gradient - * @param[in] bilinear_iy Intermediate interpolated data for Y gradient - * - * @return Values b1, b2 - */ - std::pair compute_image_mismatch_vector(const NELKInternalKeypoint &old_keypoint, const NELKInternalKeypoint &new_keypoint, const int32_t *bilinear_ix, const int32_t *bilinear_iy); - - const ITensor *_input_old; - const ITensor *_input_new; - const ITensor *_old_scharr_gx; - const ITensor *_old_scharr_gy; - IKeyPointArray *_new_points; - const IKeyPointArray *_new_points_estimates; - const IKeyPointArray *_old_points; - INELKInternalKeypointArray *_old_points_internal; - INELKInternalKeypointArray *_new_points_internal; - Termination _termination; - bool _use_initial_estimate; - float _pyramid_scale; - float _epsilon; - unsigned int _num_iterations; - int _window_dimension; - unsigned int _level; - unsigned int _num_levels; - ValidRegion _valid_region; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NELKTRACKERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h deleted file mode 100644 index ba14598135..0000000000 --- a/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H -#define ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to multiply each row of first tensor with low 2 dimensions of second tensor. */ -class NELocallyConnectedMatrixMultiplyKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NELocallyConnectedMatrixMultiplyKernel"; - } - /** Default constructor */ - NELocallyConnectedMatrixMultiplyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELocallyConnectedMatrixMultiplyKernel(const NELocallyConnectedMatrixMultiplyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELocallyConnectedMatrixMultiplyKernel &operator=(const NELocallyConnectedMatrixMultiplyKernel &) = delete; - /** Allow instances of this class to be moved */ - NELocallyConnectedMatrixMultiplyKernel(NELocallyConnectedMatrixMultiplyKernel &&) = default; - /** Allow instances of this class to be moved */ - NELocallyConnectedMatrixMultiplyKernel &operator=(NELocallyConnectedMatrixMultiplyKernel &&) = default; - /** Initialise the kernel's input and output - * - * @param[in] input0 First input tensor. Data types supported: F16, F32 - * @param[in] input1 Second input tensor containing the Matrix B. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - */ - void configure(const ITensor *input0, const ITensor *input1, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NELocallyConnectedMatrixMultiplyKernel - * - * @param[in] input0 First input tensor info. Data types supported: F16, F32 - * @param[in] input1 Second input tensor info. Data type supported: same as @p input0 - * @param[in] output Output tensor info. Data type supported: same as @p input0 - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input0; - const ITensor *_input1; - ITensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h deleted file mode 100644 index ea42a38994..0000000000 --- a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H -#define ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Template interface for the kernel to compute magnitude and phase */ -template -class NEMagnitudePhaseKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMagnitudePhaseKernel"; - } - /** Default constructor */ - NEMagnitudePhaseKernel(); - /** Destructor */ - ~NEMagnitudePhaseKernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMagnitudePhaseKernel(const NEMagnitudePhaseKernel &) = delete; - /** Default move constructor */ - NEMagnitudePhaseKernel(NEMagnitudePhaseKernel &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMagnitudePhaseKernel &operator=(const NEMagnitudePhaseKernel &) = delete; - /** Default move assignment operator */ - NEMagnitudePhaseKernel &operator=(NEMagnitudePhaseKernel &&) = default; - - /** Initialise the kernel's input, output. - * - * @note At least one of out1 or out2 must be set - * - * @param[in] gx Gradient X tensor. Data type supported: S16. - * @param[in] gy Gradient Y tensor. Data type supported: S16. - * @param[out] magnitude (Optional) The output tensor - Magnitude. Data type supported: S16. - * @param[out] phase (Optional) The output tensor - Phase. Data type supported: U8. - */ - void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Function to perform magnitude on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void magnitude(const Window &window); - /** Function to perform phase on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void phase(const Window &window); - /** Function to perform magnitude and phase on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void magnitude_phase(const Window &window); - -private: - /** Common signature for all the specialised MagnitudePhase functions - * - * @param[in] window Region on which to execute the kernel. - */ - using MagnitudePhaseFunctionPtr = void (NEMagnitudePhaseKernel::*)(const Window &window); - /** MagnitudePhase function to use for the particular formats passed to configure() */ - MagnitudePhaseFunctionPtr _func; - const ITensor *_gx; /**< Input gradient X */ - const ITensor *_gy; /**< Input gradient Y */ - ITensor *_magnitude; /**< Output - Magnitude */ - ITensor *_phase; /**< Output - Phase */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h deleted file mode 100644 index f3ea049a87..0000000000 --- a/arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H -#define ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the pooling layer kernel */ -class NEMaxUnpoolingLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMaxUnpoolingLayerKernel"; - } - /** Default constructor */ - NEMaxUnpoolingLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMaxUnpoolingLayerKernel(const NEMaxUnpoolingLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMaxUnpoolingLayerKernel &operator=(const NEMaxUnpoolingLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEMaxUnpoolingLayerKernel(NEMaxUnpoolingLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEMaxUnpoolingLayerKernel &operator=(NEMaxUnpoolingLayerKernel &&) = default; - /** Default destructor */ - ~NEMaxUnpoolingLayerKernel() = default; - /** Set the input and output tensors. - * - * @note Output shape must be equal to the shape of the original input to pool. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] indices Tensor containing the offset to store the input elements in the output tensor. - * @ref NEPoolingLayerKernel with indices should precede this function in order to - * properly reconstruct the output tensor. - * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - */ - void configure(const ITensor *input, const ITensor *indices, ITensor *output, const PoolingLayerInfo &pool_info); - /** Static function to check if given info will lead to a valid configuration of @ref NEMaxUnpoolingLayerKernel - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] output Destination tensor info. Data types supported: Same as @p input. - * @param[in] indices Tensor info of the indices of the maximal values. Data type supported: U32. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool. - * - * @param[in] window_input Input region on which to execute the kernel. - */ - template - void unpooling2(const Window &window_input); - - using UnpoolingFunction = void (NEMaxUnpoolingLayerKernel::*)(const Window &window); - -private: - UnpoolingFunction _func; - const ITensor *_input; - ITensor *_output; - const ITensor *_indices; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h b/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h deleted file mode 100644 index eef0e2b586..0000000000 --- a/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMEANSTDDEVKERNEL_H -#define ARM_COMPUTE_NEMEANSTDDEVKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "support/Mutex.h" - -#include - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */ -class NEMeanStdDevKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMeanStdDevKernel"; - } - /** Default constructor */ - NEMeanStdDevKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMeanStdDevKernel(const NEMeanStdDevKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMeanStdDevKernel &operator=(const NEMeanStdDevKernel &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMeanStdDevKernel(NEMeanStdDevKernel &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMeanStdDevKernel &operator=(NEMeanStdDevKernel &&) = delete; - /** Default destructor */ - ~NEMeanStdDevKernel() = default; - - /** Initialise the kernel's input and outputs. - * - * @param[in] input Input image. Data type supported: U8. - * @param[out] mean Input average pixel value. - * @param[out] global_sum Keeps global sum of pixel values. - * @param[out] stddev (Optional) Output standard deviation of pixel values. - * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values. - */ - void configure(const IImage *input, float *mean, uint64_t *global_sum, float *stddev = nullptr, uint64_t *global_sum_squared = nullptr); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - - BorderSize border_size() const override; - -private: - const IImage *_input; - float *_mean; - float *_stddev; - uint64_t *_global_sum; - uint64_t *_global_sum_squared; - arm_compute::Mutex _mtx; - BorderSize _border_size; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEMEANSTDDEVKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h b/arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h deleted file mode 100644 index 66b907541e..0000000000 --- a/arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H -#define ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#include -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to normalize the input 2D tensor across the first dimension with respect to mean and standard deviation of the same dimension. */ -class NEMeanStdDevNormalizationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMeanStdDevNormalizationKernel"; - } - /** Default constructor */ - NEMeanStdDevNormalizationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMeanStdDevNormalizationKernel(const NEMeanStdDevNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMeanStdDevNormalizationKernel &operator=(const NEMeanStdDevNormalizationKernel &) = delete; - /** Allow instances of this class to be moved */ - NEMeanStdDevNormalizationKernel(NEMeanStdDevNormalizationKernel &&) = default; - /** Allow instances of this class to be moved */ - NEMeanStdDevNormalizationKernel &operator=(NEMeanStdDevNormalizationKernel &&) = default; - /** Default destructor */ - ~NEMeanStdDevNormalizationKernel() = default; - /** Initialise the kernel's input and outputs. - * - * @note If the output tensor is a nullptr, the normalization will be performed in-place. - * - * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr, - * this tensor will store the result of the normalization. Data types supported: F16/F32. - * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input - * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. - */ - void configure(ITensor *input, ITensor *output = nullptr, float epsilon = 1e-8f); - /** Static function to check if given info will lead to a valid configuration of @ref NEMeanStdDevNormalizationKernel - * - * @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr, - * this tensor will store the result of the normalization. Data types supported: F16/F32. - * @param[in] output (Optional) Destination tensor info. It can be nullptr in case of in-place computation. Data type supported: same as @p input - * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output = nullptr, float epsilon = 1e-8f); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Normalizes the input with respect to mean and standard deviation. - * - * @param[in] window Region on which to execute the kernel. - */ - template - void mean_stddev_normalization(const Window &window); - - ITensor *_input; - ITensor *_output; - float _epsilon; - - using MeanStdDevNormFunction = void (NEMeanStdDevNormalizationKernel::*)(const Window &window); - - MeanStdDevNormFunction _func; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h b/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h deleted file mode 100644 index f2871e2ab5..0000000000 --- a/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMEDIAN3x3KERNEL_H -#define ARM_COMPUTE_NEMEDIAN3x3KERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Kernel to perform a median filter on a tensor */ -class NEMedian3x3Kernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEMedian3x3Kernel"; - } - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEMEDIAN3x3KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEMemsetKernel.h b/arm_compute/core/NEON/kernels/NEMemsetKernel.h deleted file mode 100644 index f9a1914360..0000000000 --- a/arm_compute/core/NEON/kernels/NEMemsetKernel.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMEMSETKERNEL_H -#define ARM_COMPUTE_NEMEMSETKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for filling the planes of a tensor */ -class NEMemsetKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMemsetKernel"; - } - /** Default constructor */ - NEMemsetKernel(); - /** Default destructor */ - ~NEMemsetKernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMemsetKernel(const NEMemsetKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMemsetKernel &operator=(const NEMemsetKernel &) = delete; - /** Allow instances of this class to be moved */ - NEMemsetKernel(NEMemsetKernel &&) = default; - /** Allow instances of this class to be moved */ - NEMemsetKernel &operator=(NEMemsetKernel &&) = default; - /** Initialise the kernel's tensor and filling value - * - * @param[in,out] tensor Input tensor to fill. Supported data types: All - * @param[in] constant_value The value used to fill the planes of the tensor - */ - void configure(ITensor *tensor, const PixelValue &constant_value); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - ITensor *_tensor; - PixelValue _constant_value; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEMEMSETKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h b/arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h deleted file mode 100644 index e7e87e9339..0000000000 --- a/arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_NEMINMAXLAYERKERNEL_H -#define ARM_COMPUTE_NEMINMAXLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "support/Mutex.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform min max search on a 3D tensor. */ -class NEMinMaxLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMinMaxLayerKernel"; - } - /** Default constructor */ - NEMinMaxLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxLayerKernel(const NEMinMaxLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxLayerKernel &operator=(const NEMinMaxLayerKernel &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMinMaxLayerKernel(NEMinMaxLayerKernel &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMinMaxLayerKernel &operator=(NEMinMaxLayerKernel &&) = delete; - /** Default destructor */ - ~NEMinMaxLayerKernel() = default; - - /** Initialise the kernel's input and outputs. - * - * @note output[0] = minimum - * @note output[1] = maximum - * - * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data type supported: F32. - * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum value for each 3D input tensor. - * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32 - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel - * - * @param[in] input Input tensor info. Data types supported: F32. - * @param[in] output Output tensor info with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor. - * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - /** Resets global minimum and maximum. */ - void reset(); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - void update_min_max(float *out_ptr, float min, float max); - const ITensor *_input; - ITensor *_output; - arm_compute::Mutex _mtx; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEMINMAXLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h b/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h deleted file mode 100644 index 83f5afce72..0000000000 --- a/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H -#define ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/INEKernel.h" -#include "support/Mutex.h" - -#include - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** Interface for the kernel to perform min max search on an image. */ -class NEMinMaxKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMinMaxKernel"; - } - /** Default constructor */ - NEMinMaxKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxKernel(const NEMinMaxKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxKernel &operator=(const NEMinMaxKernel &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMinMaxKernel(NEMinMaxKernel &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMinMaxKernel &operator=(NEMinMaxKernel &&) = delete; - /** Default destructor */ - ~NEMinMaxKernel() = default; - - /** Initialise the kernel's input and outputs. - * - * @param[in] input Input Image. Data types supported: U8/S16/F32. - * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - */ - void configure(const IImage *input, void *min, void *max); - /** Resets global minimum and maximum. */ - void reset(); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Performs the min/max algorithm on U8 images on a given window. - * - * @param win The window to run the algorithm on. - */ - void minmax_U8(Window win); - /** Performs the min/max algorithm on S16 images on a given window. - * - * @param win The window to run the algorithm on. - */ - void minmax_S16(Window win); - /** Performs the min/max algorithm on F32 images on a given window. - * - * @param win The window to run the algorithm on. - */ - void minmax_F32(Window win); - /** Common signature for all the specialised MinMax functions - * - * @param[in] window Region on which to execute the kernel. - */ - using MinMaxFunction = void (NEMinMaxKernel::*)(Window window); - /** MinMax function to use for the particular image types passed to configure() */ - MinMaxFunction _func; - /** Helper to update min/max values **/ - template - void update_min_max(T min, T max); - - const IImage *_input; /**< Input image. */ - void *_min; /**< Minimum value. */ - void *_max; /**< Maximum value. */ - arm_compute::Mutex _mtx; /**< Mutex used for result reduction. */ -}; - -/** Interface for the kernel to find min max locations of an image. */ -class NEMinMaxLocationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMinMaxLocationKernel"; - } - /** Default constructor */ - NEMinMaxLocationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxLocationKernel(const NEMinMaxLocationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxLocationKernel &operator=(const NEMinMaxLocationKernel &) = delete; - /** Allow instances of this class to be moved */ - NEMinMaxLocationKernel(NEMinMaxLocationKernel &&) = default; - /** Allow instances of this class to be moved */ - NEMinMaxLocationKernel &operator=(NEMinMaxLocationKernel &&) = default; - /** Default destructor */ - ~NEMinMaxLocationKernel() = default; - - /** Initialise the kernel's input and outputs. - * - * @param[in] input Input Image. Data types supported: U8/S16/F32. - * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] min_loc Array of minimum value locations. - * @param[out] max_loc Array of maximum value locations. - * @param[out] min_count Number of minimum value encounters. - * @param[out] max_count Number of maximum value encounters. - */ - void configure(const IImage *input, void *min, void *max, - ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr, - uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - bool is_parallelisable() const override; - -private: - /** Performs the min/max location algorithm on T type images on a given window. - * - * @param win The window to run the algorithm on. - */ - template - void minmax_loc(const Window &win); - /** Common signature for all the specialised MinMaxLoc functions - * - * @param[in] window Region on which to execute the kernel. - */ - using MinMaxLocFunction = void (NEMinMaxLocationKernel::*)(const Window &window); - /** MinMaxLoc function to use for the particular image types passed to configure() */ - MinMaxLocFunction _func; - /** Helper to create a function pointer table for the parameterized MinMaxLocation functions. */ - template - struct create_func_table; - - const IImage *_input; /**< Input image. */ - void *_min; /**< Minimum value. */ - void *_max; /**< Maximum value. */ - uint32_t *_min_count; /**< Count of minimum value encounters. */ - uint32_t *_max_count; /**< Count of maximum value encounters. */ - ICoordinates2DArray *_min_loc; /**< Locations of minimum values. */ - ICoordinates2DArray *_max_loc; /**< Locations of maximum values. */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h b/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h deleted file mode 100644 index 5fc225c910..0000000000 --- a/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NENONLINEARFILTERKERNEL_H -#define ARM_COMPUTE_NENONLINEARFILTERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to apply a non-linear filter */ -class NENonLinearFilterKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NENonLinearFilterKernel"; - } - /** Default constructor */ - NENonLinearFilterKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NENonLinearFilterKernel(NENonLinearFilterKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &) = delete; - /** Allow instances of this class to be moved */ - NENonLinearFilterKernel(NENonLinearFilterKernel &&) = default; - /** Allow instances of this class to be moved */ - NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &&) = default; - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] function Non linear function to perform - * @param[in] mask_size Mask size. Supported sizes: 3, 5 - * @param[in] pattern Mask pattern - * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Fill mask with the corresponding given pattern. - * - * @param[in,out] mask Mask to be filled according to pattern - * @param[in] cols Columns (width) of mask - * @param[in] rows Rows (height) of mask - * @param[in] pattern Pattern to fill the mask according to - */ - void fill_mask(uint8_t *mask, int cols, int rows, MatrixPattern pattern); - /** Apply a median filter when given mask pattern is defined as box. - * - * @param[in] win Window to apply the filter on. - */ - template - void median_filter_box(const Window &win); - /** Apply a min filter when given mask pattern is defined as box. - * - * @param[in] win Window to apply the filter on. - */ - template - void min_filter_box(const Window &win); - /** Apply a max filter when given mask pattern is defined as box. - * - * @param[in] win Window to apply the filter on. - */ - template - void max_filter_box(const Window &win); - /** Apply a median filter when given mask pattern is defined as cross. - * - * @param[in] win Window to apply the filter on. - */ - template - void median_filter_cross(const Window &win); - /** Apply a min filter when given mask pattern is defined as cross. - * - * @param[in] win Window to apply the filter on. - */ - template - void min_filter_cross(const Window &win); - /** Apply a max filter when given mask pattern is defined as cross. - * - * @param[in] win Window to apply the filter on. - */ - template - void max_filter_cross(const Window &win); - /** Apply a median filter when given mask pattern is defined as disk. - * - * @param[in] win Window to apply the filter on. - */ - template - void median_filter_disk(const Window &win); - /** Apply a min filter when given mask pattern is defined as disk. - * - * @param[in] win Window to apply the filter on. - */ - template - void min_filter_disk(const Window &win); - /** Apply a max filter when given mask pattern is defined as disk. - * - * @param[in] win Window to apply the filter on. - */ - template - void max_filter_disk(const Window &win); - /** Apply a non-linear filter when given mask has user-defined pattern. - * - * @param[in] win Window to apply the filter on. - */ - template - void non_linear_filter_generic(const Window &win); - -private: - unsigned int _border_width; - const ITensor *_input; - ITensor *_output; - const uint8_t *_mask; - MatrixPattern _pattern; - NonLinearFilterFunction _function; - unsigned int _func_idx; - BorderSize _border_size; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NENONLINEARFILTERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h deleted file mode 100644 index bf5c520978..0000000000 --- a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H -#define ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Interface to perform Non-Maxima suppression over a 3x3 window using NEON - * - * @note Used by @ref NEFastCorners and @ref NEHarrisCorners - */ -class NENonMaximaSuppression3x3Kernel : public INEKernel -{ -public: - const char *name() const override - { - return "NENonMaximaSuppression3x3Kernel"; - } - /** Default constructor */ - NENonMaximaSuppression3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NENonMaximaSuppression3x3Kernel(const NENonMaximaSuppression3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NENonMaximaSuppression3x3Kernel &operator=(const NENonMaximaSuppression3x3Kernel &) = delete; - /** Allow instances of this class to be moved */ - NENonMaximaSuppression3x3Kernel(NENonMaximaSuppression3x3Kernel &&) = default; - /** Allow instances of this class to be moved */ - NENonMaximaSuppression3x3Kernel &operator=(NENonMaximaSuppression3x3Kernel &&) = default; - /** Default destructor */ - ~NENonMaximaSuppression3x3Kernel() = default; - - /** Initialise the kernel's sources, destinations and border mode. - * - * @param[in] input Source tensor. Data types supported: U8/F32 - * @param[out] output Destination tensor. Data types supported: same as @p input - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -protected: - /** Common signature for all the specialised non-maxima suppression 3x3 functions - * - * @param[in] input_ptr Pointer to the input tensor. - * @param[out] output_ptr Pointer to the output tensor - * @param[in] input_stride Stride of the input tensor - */ - using NonMaxSuppr3x3Function = void(const void *__restrict input_ptr, void *__restrict output_ptr, const uint32_t input_stride); - - NonMaxSuppr3x3Function *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */ - const ITensor *_input; /**< Source tensor */ - ITensor *_output; /**< Destination tensor */ -}; - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 - */ -class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel -{ -public: - const char *name() const override - { - return "NENonMaximaSuppression3x3FP16Kernel"; - } - /** Initialise the kernel's sources, destinations and border mode. - * - * @param[in] input Source tensor. Data types supported: U8/F32. - * @param[out] output Destination tensor. Data types supported: same as @p input - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); -}; -#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 */ -using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -} // namespace arm_compute -#endif /* _ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h deleted file mode 100644 index 665b10244d..0000000000 --- a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the normalization layer kernel. - */ -class NENormalizationLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NENormalizationLayerKernel"; - } - /** Default constructor */ - NENormalizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NENormalizationLayerKernel(const NENormalizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NENormalizationLayerKernel &operator=(const NENormalizationLayerKernel &) = delete; - /** Default Move Constructor. */ - NENormalizationLayerKernel(NENormalizationLayerKernel &&) = default; - /** Default move assignment operator */ - NENormalizationLayerKernel &operator=(NENormalizationLayerKernel &&) = default; - /** Default destructor */ - ~NENormalizationLayerKernel() = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32. Data layouts supported: NCHW/NHWC. - * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], - * Data type and layout supported: same as @p input. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type and layout supported: same as @p input. - * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. - */ - void configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info); - /** Static function to check if given info will lead to a valid configuration of @ref NENormalizationLayerKernel - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32. Data layouts supported: NCHW/NHWC. - * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], - * Data type and layout supported: same as @p input. - * @param[in] output Destination tensor. Output will have the same number of dimensions as input. Data type and layout supported: same as @p input. - * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, NormalizationLayerInfo norm_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Function to perform normalization depending on the given template - * dimension. The second template parameter specifies whether the - * normalization has to be 1D or 2D. - * - * @note Only supported normalizations are: - * - 1D over X or Z - * - 2D over X and Y - * - * @param[in] window Region on which to execute the kernel. - */ - template - void normalize_float(const Window &window); - - /** Common signature for all the specialised normalization functions - * - * @param[in] window Region on which to execute the kernel. - */ - using NormalizationFunction = void (NENormalizationLayerKernel::*)(const Window &window); - -private: - NormalizationFunction _func; - const ITensor *_input; - const ITensor *_input_squared; - ITensor *_output; - NormalizationLayerInfo _norm_info; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEPadLayerKernel.h b/arm_compute/core/NEON/kernels/NEPadLayerKernel.h deleted file mode 100644 index 80daabb349..0000000000 --- a/arm_compute/core/NEON/kernels/NEPadLayerKernel.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEPADLAYERKERNEL_H -#define ARM_COMPUTE_NEPADLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to add padding to a tensor - * - * Add padding given padding information - */ -class NEPadLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEPadLayerKernel"; - } - /** Default constructor */ - NEPadLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPadLayerKernel(const NEPadLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPadLayerKernel &operator=(const NEPadLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEPadLayerKernel(NEPadLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEPadLayerKernel &operator=(NEPadLayerKernel &&) = default; - /** Default destructor */ - ~NEPadLayerKernel() = default; - - /** Initialize the function - * - * @param[in] input Source tensor. Data types supported: All. - * @param[out] output Output tensor. Data type supported: same as @p input - * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] - * specifies the front and the end padding in the i-th dimension. - * @param[in] constant_value (Optional) Constant value to be used for the padding - * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT. - * Only CONSTANT padding mode is currently supported - */ - void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT); - /** Static function to check if given info will lead to a valid configuration of @ref NEPadLayer. - * - * @param[in] input Source tensor info. Data types supported: All. - * @param[in] output Output tensor info. Data type supported: same as @p input - * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] - * specifies the front and the end padding in the i-th dimension. - * @param[in] constant_value (Optional) Constant value to be used for the padding - * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT. - * Only CONSTANT padding mode is currently supported - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run the padding function with constant padding - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void run_pad_constant(const Window &window); - - /** Function to run the padding function with constant padding for 3D input and 1D, 2D, 3D padding - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - void run_pad_constant_uint8_3Dinput_3Dpad(const Window &window); - - /** Common signature for all the specialised permute functions - * - * @param[in] window Region on which to execute the kernel. - */ - using PadFunctionPtr = void (NEPadLayerKernel::*)(const Window &window); - - PadFunctionPtr _func; - const ITensor *_input; - ITensor *_output; - PaddingList _padding; - PixelValue _constant_value; - PaddingMode _mode; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEPADLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEPermuteKernel.h b/arm_compute/core/NEON/kernels/NEPermuteKernel.h deleted file mode 100644 index 2f8af9373d..0000000000 --- a/arm_compute/core/NEON/kernels/NEPermuteKernel.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEPERMUTEKERNEL_H -#define ARM_COMPUTE_NEPERMUTEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** NEON kernel to perform tensor permutation. - * - * Permutes given a permutation vector - */ -class NEPermuteKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEPermuteKernel"; - } - /** Default constructor */ - NEPermuteKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPermuteKernel(const NEPermuteKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPermuteKernel &operator=(const NEPermuteKernel &) = delete; - /** Allow instances of this class to be moved */ - NEPermuteKernel(NEPermuteKernel &&) = default; - /** Allow instances of this class to be moved */ - NEPermuteKernel &operator=(NEPermuteKernel &&) = default; - /** Default destructor */ - ~NEPermuteKernel() = default; - - /** Set the input and output of the kernel. - * - * @note Arbitrary permutation vectors are supported with rank not greater than 4 - * - * @param[in] input The input tensor to permute. Data types supported: All - * @param[out] output The output tensor. Data types supported: Same as @p input - * @param[in] perm Permutation vector - */ - void configure(const ITensor *input, ITensor *output, const PermutationVector &perm); - /** Static function to check if given info will lead to a valid configuration of @ref CPPPermuteKernel - * - * @note Arbitrary permutation vectors are supported with rank not greater than 4 - * - * @param[in] input The input tensor to permute. Data types supported: All - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] perm Permutation vector - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run the permute - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void run_permute(const Window &window); - - /** Common signature for all the specialised permute functions - * - * @param[in] window Region on which to execute the kernel. - */ - using PermuteFunctionPtr = void (NEPermuteKernel::*)(const Window &window); - - PermuteFunctionPtr _func; - const ITensor *_input; - ITensor *_output; - PermutationVector _perm; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEPERMUTEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h deleted file mode 100644 index 6221d61f49..0000000000 --- a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H -#define ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform addition between two tensors */ -class NEPixelWiseMultiplicationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEPixelWiseMultiplicationKernel"; - } - /** Default constructor */ - NEPixelWiseMultiplicationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPixelWiseMultiplicationKernel(const NEPixelWiseMultiplicationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPixelWiseMultiplicationKernel &operator=(const NEPixelWiseMultiplicationKernel &) = delete; - /** Allow instances of this class to be moved */ - NEPixelWiseMultiplicationKernel(NEPixelWiseMultiplicationKernel &&) = default; - /** Allow instances of this class to be moved */ - NEPixelWiseMultiplicationKernel &operator=(NEPixelWiseMultiplicationKernel &&) = default; - /** Default destructor */ - ~NEPixelWiseMultiplicationKernel() = default; - /** Initialise the kernel's input, output and border mode. - * - * Valid configurations (Input1,Input2) -> Output : - * - * Support: Broadcast? Scale=1/255? - * - (U8,U8) -> U8, S16 N Y - * - (U8,S16) -> S16 N Y - * - (S16,U8) -> S16 N Y - * - (S16,S16) -> S16 N Y - * - (S32,S32) -> S32 Y N - * - (F16,F16) -> F16 N Y - * - (F32,F32) -> F32 Y Y - * - (QASYMM8,QASYMM8) -> QASYMM8 Y Y - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED Y Y - * - (QSYMM16,QSYMM16) -> QSYMM16, S32 N Y - * - * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. - * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. - * - * @param[in] input1 First input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 - * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255 - * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype - * @param[in] rounding_policy Rounding policy. - */ - void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); - /** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplicationKernel - * - * Valid configurations (Input1,Input2) -> Output : - * Support: Broadcast? Scale=1/255? - * - (U8,U8) -> U8, S16 N Y - * - (U8,S16) -> S16 N Y - * - (S16,U8) -> S16 N Y - * - (S16,S16) -> S16 N Y - * - (S32,S32) -> S32 Y N - * - (F16,F16) -> F16 N Y - * - (F32,F32) -> F32 Y Y - * - (QASYMM8,QASYMM8) -> QASYMM8 Y Y - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED Y Y - * - (QSYMM16,QSYMM16) -> QSYMM16, S32 N Y - * - * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. - * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. - * - * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 - * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 - * @param[in] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255 - * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype - * @param[in] rounding_policy Rounding policy. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); - - // Inherited methods overridden - void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised multiplication functions with integer scaling factor - * - * @param[in] in1 Input1 tensor object. - * @param[in] in2 Input2 tensor object. - * @param[out] out Output tensor object. - * @param[in] window Region on which to execute the kernel - * @param[in] scale Integer scale factor. - */ - using MulFunctionInt = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, int scale); - /** Common signature for all the specialised multiplication functions with float scaling factor - * - * @param[in] in1 Input1 tensor object. - * @param[in] in2 Input2 tensor object. - * @param[out] out Output tensor object. - * @param[in] window Region on which to execute the kernel - * @param[in] scale Float scale factor. - */ - using MulFunctionFloat = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, float scale); - /** Common signature for all the specialised QASYMM8 multiplication functions with float scaling factor - * - * @param[in] in1 Input1 tensor object. - * @param[in] in2 Input2 tensor object. - * @param[out] out Output tensor object. - * @param[in] window Region on which to execute the kernel - * @param[in] scale Float scale factor. - * - */ - using MulFunctionQuantized = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, float scale); - - MulFunctionFloat *_func_float; - MulFunctionInt *_func_int; - MulFunctionQuantized *_func_quantized; - -private: - float _scale; - int _scale_exponent; -}; - -/** Interface for the complex pixelwise multiplication kernel. */ -class NEComplexPixelWiseMultiplicationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEComplexPixelWiseMultiplicationKernel"; - } - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input1 An input tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor). - * @param[in] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1. - */ - void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEComplexPixelWiseMultiplicationKernel - * - * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor). - * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; -}; - -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h deleted file mode 100644 index 2be25080cd..0000000000 --- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H -#define ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the pooling layer kernel */ -class NEPoolingLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEPoolingLayerKernel"; - } - /** Default constructor */ - NEPoolingLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPoolingLayerKernel(const NEPoolingLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPoolingLayerKernel &operator=(const NEPoolingLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEPoolingLayerKernel(NEPoolingLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEPoolingLayerKernel &operator=(NEPoolingLayerKernel &&) = default; - /** Default destructor */ - ~NEPoolingLayerKernel() = default; - /** Set the input and output tensors. - * - * @note F16 are supported for pool sizes 2 and 3 only - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. - */ - void configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info, ITensor *indices = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref NEPoolingLayerKernel - * - * @note F16 are supported for pool sizes 2 and 3 only - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Function to perform 2x2 pooling. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void pooling2_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - */ - void pooling2_f32_nhwc_maxpool_indices(const Window &window_input, const Window &window); - /** Function to perform MxN pooling for 32-bit floating point values. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void poolingMxN_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Function to perform MxN pooling for 32-bit floating point values (NHWC). - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void poolingMxN_f32_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Function to perform 7x7 pooling. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void pooling7_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Function to perform 3x3 pooling. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void pooling3_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Function to perform 2x2 pooling for float16_t. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void pooling2_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Function to perform 2x2 pooling and compute the pooling indices for FP32/FP16. The indices can be used for max unpool. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - */ - template - void pooling2_nchw_maxpool_indices(const Window &window_input, const Window &window); - /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - */ - void pooling2_f16_nhwc_maxpool_indices(const Window &window_input, const Window &window); - /** Function to perform 3x3 pooling. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void pooling3_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Function to perform MxN pooling for 16-bit floating point values. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void poolingMxN_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Function to perform MxN pooling for 16-bit floating point values. (NHWC) - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void poolingMxN_f16_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Template function to perform 2x2 pooling for 8bit quantized fixed point. (NCHW) - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - template - void pooling2_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Template function to perform 3x3 pooling for 8bit quantized fixed point. (NCHW) - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - template - void pooling3_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Template function to perform MxN pooling for 8-bit quantized. (NCHW) - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - template - void poolingMxN_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Template function to perform MxN pooling for 8-bit quantized. (NHWC) - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - template - void poolingMxN_q8_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Common signature for all the specialised Pooling functions - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - using PoolingFunction = void (NEPoolingLayerKernel::*)(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding); - -private: - PoolingFunction _func; - const ITensor *_input; - ITensor *_output; - ITensor *_indices; - PoolingLayerInfo _pool_info; - DataLayout _data_layout; - unsigned int _num_elems_processed_per_iteration; - BorderSize _border_size; - bool _is_square; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h b/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h deleted file mode 100644 index 84db99100b..0000000000 --- a/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H -#define ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to calculate prior boxes */ -class NEPriorBoxLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEPriorBoxLayerKernel"; - } - /** Default constructor */ - NEPriorBoxLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPriorBoxLayerKernel(const NEPriorBoxLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPriorBoxLayerKernel &operator=(const NEPriorBoxLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEPriorBoxLayerKernel(NEPriorBoxLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEPriorBoxLayerKernel &operator=(NEPriorBoxLayerKernel &&) = default; - /** Set the input and output tensors. - * - * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. - * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 - * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input - * @param[in] info Prior box layer info. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output, const PriorBoxLayerInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref NEPriorBoxLayerKernel - * - * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC. - * @param[in] input2 Second source tensor info. Data types and layouts supported: same as @p input1 - * @param[in] output Destination tensor info. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input - * @param[in] info Prior box layer info. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Stores the coordinates of the calculated prior boxes. - * - * @param[out] out Output pointer. - * @param[in] offset Output offset to write to. - * @param[in] center_x Center pixel value on x-axis. - * @param[in] center_y Center pixel value on y-axis. - * @param[in] box_width Prior box width. - * @param[in] box_height Prior box height. - * @param[in] width Input width. - * @param[in] height Input height. - */ - void store_coordinates(float *out, const int offset, const float center_x, const float center_y, const float box_width, const float box_height, const int width, const int height); - /** Function to calculate prior boxes. - * - * @param[in] window Input region on which to execute the kernel. - */ - void calculate_prior_boxes(const Window &window); - - const ITensor *_input1; - const ITensor *_input2; - ITensor *_output; - PriorBoxLayerInfo _info; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h b/arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h deleted file mode 100644 index 86c9e1d3af..0000000000 --- a/arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H -#define ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform layer normalization */ -class NEQLSTMLayerNormalizationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEQLSTMLayerNormalizationKernel"; - } - /** Default constructor */ - NEQLSTMLayerNormalizationKernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEQLSTMLayerNormalizationKernel(const NEQLSTMLayerNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEQLSTMLayerNormalizationKernel &operator=(const NEQLSTMLayerNormalizationKernel &) = delete; - /** Default Move Constructor. */ - NEQLSTMLayerNormalizationKernel(NEQLSTMLayerNormalizationKernel &&) = default; - /** Default move assignment operator */ - NEQLSTMLayerNormalizationKernel &operator=(NEQLSTMLayerNormalizationKernel &&) = default; - /** Default destructor */ - ~NEQLSTMLayerNormalizationKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QSYMM16. - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] weight Weight tensor. Data types supported: Same as @p input. - * @param[in] bias Bias tensor. Data types supported: S32 - */ - void configure(const ITensor *input, ITensor *output, const ITensor *weight, const ITensor *bias); - /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayerNormalizationKernel - * - * @param[in] input Source tensor info. Data types supported: QSYMM16. - * @param[in] output Destination tensor info. Data types supported: Same as @p input. - * @param[in] weight Weight tensor info. Data types supported: Same as @p input. - * @param[in] bias Bias tensor info. Data types supported: S32 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias); - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - // constants - static constexpr uint32_t max_input_dimension{ 2 }; /**< The maximum input dimension supported */ - static constexpr uint32_t max_weight_dimension{ 1 }; /**< The maximum weight dimension supported */ - static constexpr uint32_t max_bias_dimension{ 1 }; /**< The maximum bias dimension supported */ - static constexpr uint32_t vector_size_byte{ 16 }; /**< Computation vector size in byte */ - - using ComputeFuncType = std::function; - - ComputeFuncType _fn{}; /**< Function pointer to computation function */ - - const ITensor *_input{ nullptr }; /**< Input tensor */ - const ITensor *_weight{ nullptr }; /**< Weight tensor */ - const ITensor *_bias{ nullptr }; /**< Bias tensor */ - ITensor *_output{ nullptr }; /**< Output tensor */ - - int32_t _output_multiplier{}; /**< Multiplier for output values */ - int32_t _output_shift{}; /**< Shift value for output values */ - - int32_t _window_start_x{}; /**< The beginning of x-axis iteration */ - int32_t _window_end_x{}; /**< The end of x-axis iteration */ - int32_t _window_step_x{}; /**< The size of x-axis iteration's step */ - - Window _inout_window{}; /**< Window for input and output tensor */ - Window _weight_window{}; /**< Window for weight and bias tensor */ - - /** Function to configure initial windows for destination of computation - * - * @param[in] Target destination tensor to use for output window - * - * @return configured window - */ - Window configure_window(ITensor *target); - // Function to compute for data type QSYMM16 - void compute_qsymm16(); - /** Function to compute summation and summation of squared input of the given input pointer - * - * @param[in] Input_ptr pointer to input array - * - */ - std::pair sum_qsymm16(const int16_t *input_ptr); - /** Function to normalize values using computed mean and standard deviation - * - * @param[in] input_ptr Pointer to input array - * @param[in] output_ptr Pointer to output array - * @param[in] weight_ptr Pointer to weight array - * @param[in] bias_ptr Pointer to bias array - * @param[in] mean Mean value - * @param[in] inv_std_mul Quantized multiplier for standard deviation - * @param[in] inv_std_shift Shift for standard deviation - * - */ - void normalize_qasymm16(const int16_t *input_ptr, - int16_t *output_ptr, - const int16_t *weight_ptr, - const int32_t *bias_ptr, - int32_t mean, int32_t inv_std_mul, int32_t inv_std_shift); - /** Function to compute output quantization information */ - QuantizationInfo compute_output_qinfo(); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h deleted file mode 100644 index d35e027ff5..0000000000 --- a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the quantization layer kernel. - * - * @note The implementation supports only 3D input tensors - * - */ -class NEQuantizationLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEQuantizationLayerKernel"; - } - /** Default constructor */ - NEQuantizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEQuantizationLayerKernel(const NEQuantizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEQuantizationLayerKernel &operator=(const NEQuantizationLayerKernel &) = delete; - /** Default Move Constructor. */ - NEQuantizationLayerKernel(NEQuantizationLayerKernel &&) = default; - /** Default move assignment operator */ - NEQuantizationLayerKernel &operator=(NEQuantizationLayerKernel &&) = default; - /** Default destructor */ - ~NEQuantizationLayerKernel() = default; - /** Set the input, output. - * - * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. - * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. - * - * @note Output auto initialization is not supported by this kernel - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayerKernel - * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. - * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised @ref NEQuantizationLayerKernel functions - * - * @param[in] window Region on which to execute the kernel. - */ - using QuantizationFunctionExecutorPtr = void (NEQuantizationLayerKernel::*)(const Window &window); - /** Function to apply QASYMM8 or QASYMM8_SIGNED quantization on a tensor. - * - * @param[in] window Region on which to execute the kernel. - */ - template - void run_quantize_qasymm8(const Window &window); - /** Function to apply QASYMM16 quantization on a tensor. - * - * @param[in] window Region on which to execute the kernel. - */ - template - void run_quantize_qasymm16(const Window &window); - - const ITensor *_input; - ITensor *_output; - - QuantizationFunctionExecutorPtr _func; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h b/arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h deleted file mode 100644 index 66ebb5e261..0000000000 --- a/arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H -#define ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the RoIAlign kernel. - */ -class NEROIAlignLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEROIAlignLayerKernel"; - } - - /** Constructor */ - NEROIAlignLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEROIAlignLayerKernel(const NEROIAlignLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEROIAlignLayerKernel &operator=(const NEROIAlignLayerKernel &) = delete; - /** Default Move Constructor. */ - NEROIAlignLayerKernel(NEROIAlignLayerKernel &&) = default; - /** Default move assignment operator. */ - NEROIAlignLayerKernel &operator=(NEROIAlignLayerKernel &&) = default; - /** Default destructor */ - ~NEROIAlignLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. - * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner - * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. - * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, otherwise same as @p input - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. - * - * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled - * width and pooled height. - * @note The z dimensions of @p output tensor and @p input tensor must be the same. - * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. - */ - void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info); - /** Static function to check if given info will lead to a valid configuration of @ref NEROIAlignLayerKernel - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. - * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, - * otherwise same as @p input - * @param[in] output Destination tensor info. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. - * - * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled - * width and pooled height. - * @note The z dimensions of @p output tensor and @p input tensor must be the same. - * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. - * - * @return a Status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - template - void internal_run(const Window &window, const ThreadInfo &info); - - const ITensor *_input; - ITensor *_output; - const ITensor *_rois; - ROIPoolingLayerInfo _pool_info; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H*/ diff --git a/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h deleted file mode 100644 index fa9685bc6b..0000000000 --- a/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H -#define ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -#include "arm_compute/core/IArray.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the ROI pooling layer kernel */ -class NEROIPoolingLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEROIPoolingLayerKernel"; - } - /** Default constructor */ - NEROIPoolingLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEROIPoolingLayerKernel(const NEROIPoolingLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEROIPoolingLayerKernel &operator=(const NEROIPoolingLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEROIPoolingLayerKernel(NEROIPoolingLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEROIPoolingLayerKernel &operator=(NEROIPoolingLayerKernel &&) = default; - /** Default destructor */ - ~NEROIPoolingLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F32. - * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner - * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16 - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. - * - * @note The x and y dimensions of @p output tensor must be the same as that specified by @p pool_info 's pooled - * width and pooled height. - * @note The z dimensions of @p output tensor and @p input tensor must be the same. - * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois tensor. - */ - void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - const ITensor *_rois; - ITensor *_output; - ROIPoolingLayerInfo _pool_info; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NERangeKernel.h b/arm_compute/core/NEON/kernels/NERangeKernel.h deleted file mode 100644 index 84ebd53b1b..0000000000 --- a/arm_compute/core/NEON/kernels/NERangeKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NERANGEKERNEL_H -#define ARM_COMPUTE_NERANGEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Kernel class for Range - * - * range generates a 1-D tensor containing a sequence of numbers that begins at 'start' and extends by increments - * of 'step' up to but not including 'end'. - */ -class NERangeKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NERangeKernel"; - } - /** Default constructor */ - NERangeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NERangeKernel(const NERangeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NERangeKernel &operator=(const NERangeKernel &) = delete; - /** Allow instances of this class to be moved */ - NERangeKernel(NERangeKernel &&) = default; - /** Allow instances of this class to be moved */ - NERangeKernel &operator=(NERangeKernel &&) = default; - /** Default destructor */ - ~NERangeKernel() = default; - /** Initialize the kernel's output tensor, start, end and step of the sequence. - * - * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. - * @param[in] start The starting value of the sequence. - * @param[in] end The ending (not including) value of the sequence. - * @param[in] step The gap between each pair of values in the sequence. - */ - void configure(ITensor *output, float start, float end, float step); - /** Static function to check if given info will lead to a valid configuration of @ref NERangeKernel - * - * @param[in] output Output tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. - * @param[in] start The starting value of the sequence. - * @param[in] end The ending (not including) value of the sequence. - * @param[in] step The gap between each pair of values in the sequence. - * - * @return a status - */ - static Status validate(const ITensorInfo *output, float start, float end, float step); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - using RangeFunction = void(ITensor *output, float start, float step, const Window &window); - - RangeFunction *_func; /**< Range function to be called */ - float _start; /**< Start of sequence */ - float _end; /**< End of sequence */ - float _step; /**< Increment/step value */ - ITensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NERANGEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h deleted file mode 100644 index b913ea4e90..0000000000 --- a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H -#define ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a reduction operation - * - * @note For ARG_MIN/ARG_MAX reduction, the default data type for an uninitialized - * output tensor is signed 32-bit integer (S32). It is the user's responsibility - * to check that the results do not overflow because the indices are computed - * in unsigned 32-bit (U32). - */ -class NEReductionOperationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEReductionOperationKernel"; - } - /** Default constructor */ - NEReductionOperationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEReductionOperationKernel(const NEReductionOperationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEReductionOperationKernel &operator=(const NEReductionOperationKernel &) = delete; - /** Allow instances of this class to be moved */ - NEReductionOperationKernel(NEReductionOperationKernel &&) = default; - /** Allow instances of this class to be moved */ - NEReductionOperationKernel &operator=(NEReductionOperationKernel &&) = default; - /** Default destructor */ - ~NEReductionOperationKernel() = default; - - /** Set the source, destination of the kernel - * - * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. - * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 - * @param[in] op Reduction operation to perform. - */ - void configure(const ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op); - - /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperationKernel. - * - * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. - * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 - * @param[in] op Reduction operation to perform. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; - unsigned int _reduction_axis; - ReductionOperation _op; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NERemapKernel.h b/arm_compute/core/NEON/kernels/NERemapKernel.h deleted file mode 100644 index 34c80a38d9..0000000000 --- a/arm_compute/core/NEON/kernels/NERemapKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEREMAPKERNEL_H -#define ARM_COMPUTE_NEREMAPKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a remap on a tensor */ -class NERemapKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NERemapKernel"; - } - /** Default constructor */ - NERemapKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NERemapKernel(const NERemapKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NERemapKernel &operator=(const NERemapKernel &) = delete; - /** Allow instances of this class to be moved */ - NERemapKernel(NERemapKernel &&) = default; - /** Allow instances of this class to be moved */ - NERemapKernel &operator=(NERemapKernel &&) = default; - /** Default destructor */ - ~NERemapKernel() = default; - - /** Initialize the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[in] map_x Map for X coordinates. Data type supported: F32. - * @param[in] map_y Map for Y coordinates. Data type supported: F32. - * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. - * @param[in] policy The interpolation type. - */ - void configure(const ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** function to perform nearest interpolation on the given window */ - void remap_nearest(const Window &window); - /** function to perform bilinear interpolation on the given window */ - void remap_bilinear(const Window &window); - /** Remap function to use for the particular interpolation type passed to configure() */ - void (NERemapKernel::*_func)(const Window &window); - - const ITensor *_input; /**< Input image */ - ITensor *_output; /**< Output image */ - const ITensor *_map_x; /**< Input remap x coordinates */ - const ITensor *_map_y; /**< Input remap y coordinates */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEREMAPKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h b/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h deleted file mode 100644 index d751a6b24c..0000000000 --- a/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEREORGLAYERKERNEL_H -#define ARM_COMPUTE_NEREORGLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the kernel to perform tensor re-organization */ -class NEReorgLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEReorgLayerKernel"; - } - /** Default constructor */ - NEReorgLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEReorgLayerKernel(const NEReorgLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEReorgLayerKernel &operator=(const NEReorgLayerKernel &) = delete; - /** Default Move Constructor. */ - NEReorgLayerKernel(NEReorgLayerKernel &&) = default; - /** Default move assignment operator */ - NEReorgLayerKernel &operator=(NEReorgLayerKernel &&) = default; - /** Default destructor */ - ~NEReorgLayerKernel() = default; - /** Set the input and output of the kernel - * - * @param[in] input Source tensor. Data type supported: All - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] stride Stride to be used during data re-organization. - * It defines the spatial distance between 2 consecutive pixels in the x and y direction - */ - void configure(const ITensor *input, ITensor *output, int32_t stride); - - /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel - * - * @param[in] input Source tensor info. Data type supported: All - * @param[in] output Destination tensor info. Data type supported: Same as @p input - * @param[in] stride Stride to be used during data re-organization - * It defines the spatial distance between 2 consecutive pixels in the x and y direction - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t stride); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; - int32_t _stride; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEREORGLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h deleted file mode 100644 index a4b8426e41..0000000000 --- a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NERESHAPELAYERKERNEL_H -#define ARM_COMPUTE_NERESHAPELAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the kernel to perform tensor reshaping */ -class NEReshapeLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEReshapeLayerKernel"; - } - /** Set the input and output info of the kernel - * - * @param[in] input Source tensor info. Data type supported: All - * @param[out] output Destination tensor info. Data type supported: Same as @p input - */ - void configure(const ITensorInfo *input, ITensorInfo *output); - - /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel - * - * @param[in] input Source tensor info. Data type supported: All - * @param[in] output Destination tensor info. Data type supported: Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NERESHAPELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEReverseKernel.h b/arm_compute/core/NEON/kernels/NEReverseKernel.h deleted file mode 100644 index fda79154a0..0000000000 --- a/arm_compute/core/NEON/kernels/NEReverseKernel.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEREVERSEKERNEL_H -#define ARM_COMPUTE_NEREVERSEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the reverse layer kernel. */ -class NEReverseKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEReverseKernel"; - } - /** Default constructor */ - NEReverseKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEReverseKernel(const NEReverseKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEReverseKernel &operator=(const NEReverseKernel &) = delete; - /** Allow instances of this class to be moved */ - NEReverseKernel(NEReverseKernel &&) = default; - /** Allow instances of this class to be moved */ - NEReverseKernel &operator=(NEReverseKernel &&) = default; - /** Default destructor */ - ~NEReverseKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 - */ - void configure(const ITensor *input, ITensor *output, const ITensor *axis); - - /** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] output Output tensor info. Data type supported: Same as @p input - * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; - const ITensor *_axis; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEREVERSEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h deleted file mode 100644 index b35bb72741..0000000000 --- a/arm_compute/core/NEON/kernels/NEScaleKernel.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESCALEKERNEL_H -#define ARM_COMPUTE_NESCALEKERNEL_H - -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform scaling on a tensor */ -class NEScaleKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEScaleKernel"; - } - /** Default constructor */ - NEScaleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEScaleKernel(const NEScaleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEScaleKernel &operator=(const NEScaleKernel &) = delete; - /** Allow instances of this class to be moved */ - NEScaleKernel(NEScaleKernel &&) = default; - /** Allow instances of this class to be moved */ - NEScaleKernel &operator=(NEScaleKernel &&) = default; - /** Default destructor */ - ~NEScaleKernel() = default; - - /** Initialise the kernel's inputs, output and interpolation policy - * - * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor - * @note Using @p policy Area only supports data layout NCHW and input data type U8. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. - * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32 - * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32 - * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] info @ref ScaleKernelInfo to use for configuration - */ - void configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output, - const ScaleKernelInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref NEScaleKernel - * - * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor - * @note Using @p policy Area only supports data layout NCHW and input data type U8. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. - * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32 - * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32 - * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32. - * @param[in] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] info @ref ScaleKernelInfo to use for validation - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *dx, const ITensorInfo *dy, const ITensorInfo *offsets, ITensorInfo *output, - const ScaleKernelInfo &info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** function to perform scale using area interpolation on the given window - * - * @note Used only in case down-sampling. - */ - void scale_area_nchw_u8(const Window &window); - - /** function to perform scale using bilinear interpolation on the given window */ - template - void scale_bilinear_nchw(const Window &window); - /** function to perform scale using bilinear interpolation on the given window */ - template - void scale_bilinear_nhwc(const Window &window); - /** function to perform scale using bilinear interpolation on the given window */ - template - void scale_bilinear_qasymm(const Window &window); - - /** function to perform scale using nearest neighbour on the given window */ - template - void scale_nearest_nchw(const Window &window); - /** function to perform scale using nearest neighbour on the given window */ - template - void scale_nearest_nhwc(const Window &window); - - /** Scale function to use for the particular function to use */ - using ScaleFunctionPtr = void (NEScaleKernel::*)(const Window &window); - - ScaleFunctionPtr _func; - const ITensor *_offsets; - const ITensor *_dx; - const ITensor *_dy; - const ITensor *_input; - ITensor *_output; - InterpolationPolicy _policy; - BorderMode _border_mode; - PixelValue _constant_border_value; - float _sampling_offset; - bool _align_corners; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NESCALEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h b/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h deleted file mode 100644 index 7e1fdb5d9e..0000000000 --- a/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESCHARR3x3KERNEL_H -#define ARM_COMPUTE_NESCHARR3x3KERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to run a 3x3 Scharr filter on a tensor. - * -* @f[ -* \mathbf{G}_x=\begin{vmatrix} -* -3 & 0 & +3\\ -* -10& 0 & +10\\ -* -3 & 0 & +3 -* \end{vmatrix} -* @f] -*/ -class NEScharr3x3Kernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEScharr3x3Kernel"; - } - /** Default constructor */ - NEScharr3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEScharr3x3Kernel(const NEScharr3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEScharr3x3Kernel &operator=(const NEScharr3x3Kernel &) = delete; - /** Allow instances of this class to be moved */ - NEScharr3x3Kernel(NEScharr3x3Kernel &&) = default; - /** Allow instances of this class to be moved */ - NEScharr3x3Kernel &operator=(NEScharr3x3Kernel &&) = default; - /** Default destructor */ - ~NEScharr3x3Kernel() = default; - - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - bool _run_scharr_x; /**< Do we need to run Scharr X ? */ - bool _run_scharr_y; /**< Do we need to run Scharr Y ? */ - const ITensor *_input; /**< Input tensor */ - ITensor *_output_x; /**< Output tensor for scharr X */ - ITensor *_output_y; /**< Output tensor for scharr Y */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NESCHARR3x3KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NESelectKernel.h b/arm_compute/core/NEON/kernels/NESelectKernel.h deleted file mode 100644 index bb8695f598..0000000000 --- a/arm_compute/core/NEON/kernels/NESelectKernel.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESELECTKERNEL_H -#define ARM_COMPUTE_NESELECTKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the select kernel - * - * Select is computed by: - * @f[ output(i) = condition(i) ? x(i) : y(i) @f] - * - */ -class NESelectKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESelectKernel"; - } - /** Default constructor */ - NESelectKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESelectKernel(const NESelectKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESelectKernel &operator=(const NESelectKernel &) = delete; - /** Allow instances of this class to be moved */ - NESelectKernel(NESelectKernel &&) = default; - /** Allow instances of this class to be moved */ - NESelectKernel &operator=(NESelectKernel &&) = default; - /** Default destructor */ - ~NESelectKernel() = default; - - /** Common signature for all the specialised elementwise functions - * - * @param[in] c Condition input tensor. Data types supported: U8. - * @param[in] x First input tensor. Data types supported: All. - * @param[out] y Second input tensor. Data types supported: Same as @p x - * @param[in] output Output tensor. Data types supported: Same as @p x - */ - void configure(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output); - - /** Validate the argument passed to the kernel - * - * @param[in] c Condition input tensor. Data types supported: U8. - * @param[in] x First input tensor. Data types supported: All. - * @param[in] y Second input tensor. Data types supported: Same as @p x - * @param[in] output Output tensor. Data types supported: Same as @p x. - * - * @return a status - */ - static Status validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised select functions - * - * @param[in] c Condition input tensor. Data types supported: U8. - * @param[in] x First input tensor. Data types supported: All. - * @param[in] y Second input tensor. Data types supported: Same as @p x - * @param[in] output Output tensor. Data types supported: Same as @p x. - */ - using SelectFunction = void(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window); - - /** Select function to use for the particular tensor types passed to configure() */ - SelectFunction *_function; - const ITensor *_c; /**< Condition tensor */ - const ITensor *_x; /**< Source tensor 1 */ - const ITensor *_y; /**< Source tensor 2 */ - ITensor *_output; /**< Destination tensor */ - bool _has_same_rank; /**< Flag that indicates if condition tensor and other inputs have the same rank */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NESELECTKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h b/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h deleted file mode 100644 index 66a13c4c26..0000000000 --- a/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL3x3KERNEL_H -#define ARM_COMPUTE_NESOBEL3x3KERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to run a 3x3 Sobel X filter on a tensor. - * - * @f[ - * \mathbf{G}_x=\begin{vmatrix} - * -1 & 0 & +1\\ - * -2 & 0 & +2\\ - * -1 & 0 & +1 - * \end{vmatrix} - * @f] -*/ -class NESobel3x3Kernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESobel3x3Kernel"; - } - /** Default constructor */ - NESobel3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel3x3Kernel(const NESobel3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel3x3Kernel &operator=(const NESobel3x3Kernel &) = delete; - /** Allow instances of this class to be moved */ - NESobel3x3Kernel(NESobel3x3Kernel &&) = default; - /** Allow instances of this class to be moved */ - NESobel3x3Kernel &operator=(NESobel3x3Kernel &&) = default; - /** Default destructor */ - ~NESobel3x3Kernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - bool _run_sobel_x; /**< Do we need to run Sobel X ? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ - const ITensor *_input; /**< Input tensor */ - ITensor *_output_x; /**< Output tensor for sobel X */ - ITensor *_output_y; /**< Output tensor for sobel Y */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NESOBEL3x3KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h b/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h deleted file mode 100644 index 02029b6a47..0000000000 --- a/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL5x5KERNEL_H -#define ARM_COMPUTE_NESOBEL5x5KERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. - * - */ -class NESobel5x5HorKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESobel5x5HorKernel"; - } - /** Default constructor */ - NESobel5x5HorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel5x5HorKernel(const NESobel5x5HorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel5x5HorKernel &operator=(const NESobel5x5HorKernel &) = delete; - /** Allow instances of this class to be moved */ - NESobel5x5HorKernel(NESobel5x5HorKernel &&) = default; - /** Allow instances of this class to be moved */ - NESobel5x5HorKernel &operator=(NESobel5x5HorKernel &&) = default; - /** Default destructor */ - ~NESobel5x5HorKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @note At least one of output_x or output_y must be set - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - const ITensor *_input; /**< Input tensor */ - ITensor *_output_x; /**< X output of horizontal pass */ - ITensor *_output_y; /**< Y output of horizontal pass */ - bool _run_sobel_x; /**< Do we need to run Sobel X? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y? */ - BorderSize _border_size; /**< Border size */ -}; - -/** Interface for the kernel to run the vertical pass of 5x5 Sobel Y filter on a tensor. - * -*/ -class NESobel5x5VertKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESobel5x5VertKernel"; - } - /** Default constructor */ - NESobel5x5VertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel5x5VertKernel(const NESobel5x5VertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel5x5VertKernel &operator=(const NESobel5x5VertKernel &) = delete; - /** Allow instances of this class to be moved */ - NESobel5x5VertKernel(NESobel5x5VertKernel &&) = default; - /** Allow instances of this class to be moved */ - NESobel5x5VertKernel &operator=(NESobel5x5VertKernel &&) = default; - /** Default destructor */ - ~NESobel5x5VertKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input_x Input for X (X output of hor pass). Data type supported: S16. - * @param[in] input_y Input for Y (Y output of hor pass). Data type supported: S16. - * @param[out] output_x Destination tensor for the X gradient. Data type supported: S16. - * @param[out] output_y Destination tensor for the Y gradient. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(ITensor *input_x, ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - ITensor *_input_x; /**< X input (X output of the hor pass) */ - ITensor *_input_y; /**< Y input (Y output of the hor pass) */ - ITensor *_output_x; /**< X output of sobel */ - ITensor *_output_y; /**< Y output of sobel */ - bool _run_sobel_x; /**< Do we need to run sobel X? */ - bool _run_sobel_y; /**< Do we need to run sobel Y? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NESOBEL5x5KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h b/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h deleted file mode 100644 index 0e8b82c96a..0000000000 --- a/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL7x7KERNEL_H -#define ARM_COMPUTE_NESOBEL7x7KERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. - * - */ -class NESobel7x7HorKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESobel7x7HorKernel"; - } - /** Default constructor */ - NESobel7x7HorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel7x7HorKernel(const NESobel7x7HorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel7x7HorKernel &operator=(const NESobel7x7HorKernel &) = delete; - /** Allow instances of this class to be moved */ - NESobel7x7HorKernel(NESobel7x7HorKernel &&) = default; - /** Allow instances of this class to be moved */ - NESobel7x7HorKernel &operator=(NESobel7x7HorKernel &&) = default; - /** Default destructor */ - ~NESobel7x7HorKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - const ITensor *_input; /**< Input tensor */ - ITensor *_output_x; /**< X output of horizontal pass */ - ITensor *_output_y; /**< Y output of horizontal pass */ - bool _run_sobel_x; /**< Do we need to run Sobel X? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y? */ - BorderSize _border_size; /**< Border size */ -}; - -/** Interface for the kernel to run the vertical pass of 7x7 Sobel Y filter on a tensor. - * -*/ -class NESobel7x7VertKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESobel7x7VertKernel"; - } - /** Default constructor */ - NESobel7x7VertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel7x7VertKernel(const NESobel7x7VertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel7x7VertKernel &operator=(const NESobel7x7VertKernel &) = delete; - /** Allow instances of this class to be moved */ - NESobel7x7VertKernel(NESobel7x7VertKernel &&) = default; - /** Allow instances of this class to be moved */ - NESobel7x7VertKernel &operator=(NESobel7x7VertKernel &&) = default; - /** Default destructor */ - ~NESobel7x7VertKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @note At least one of output_x or output_y must be set - * @note If output_x is set then input_x must be set too - * @note If output_y is set then input_y must be set too - * - * @param[in] input_x (Optional) Input for X (X output of hor pass). Data type supported: S32. - * @param[in] input_y (Optional) Input for Y (Y output of hor pass). Data type supported: S32. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input_x, const ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - const ITensor *_input_x; /**< X input (X output of the hor pass) */ - const ITensor *_input_y; /**< Y input (Y output of the hor pass) */ - ITensor *_output_x; /**< X output of sobel */ - ITensor *_output_y; /**< Y output of sobel */ - bool _run_sobel_x; /**< Do we need to run sobel X? */ - bool _run_sobel_y; /**< Do we need to run sobel Y? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NESOBEL7x7KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h deleted file mode 100644 index e80cd222c5..0000000000 --- a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H -#define ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the identifying the max value of 1D Logits */ -class NELogits1DMaxKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NELogits1DMaxKernel"; - } - /** Default constructor */ - NELogits1DMaxKernel(); - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data types supported: same as @p input - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DMaxKernel - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] output Destination tensor. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - using Logits1DMaxFunction = void(const ITensor &in, ITensor &out, const Window &window); - -private: - Logits1DMaxFunction *_func; - BorderSize _border_size; -}; - -/** Interface for softmax computation for QASYMM8 with pre-computed max. */ -template -class NELogits1DSoftmaxKernel : public INEKernel -{ -public: - const char *name() const override - { - if(IS_LOG) - { - return "NELogits1DSoftmaxKernel"; - } - else - { - return "NELogits1DLogSoftmaxKernel"; - } - } - /** Default constructor */ - NELogits1DSoftmaxKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELogits1DSoftmaxKernel(const NELogits1DSoftmaxKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELogits1DSoftmaxKernel &operator=(const NELogits1DSoftmaxKernel &) = delete; - /** Allow instances of this class to be moved */ - NELogits1DSoftmaxKernel(NELogits1DSoftmaxKernel &&) = default; - /** Allow instances of this class to be moved */ - NELogits1DSoftmaxKernel &operator=(NELogits1DSoftmaxKernel &&) = default; - /** Default destructor */ - ~NELogits1DSoftmaxKernel() = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] max Max values tensor. Same shape as input with dimension 0 set to 1. - * Data types supported: same as @p input. - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] beta A scaling factor for the exponent. - * - * @param tmp Auxiliary tensor. Must be type F32 and same shape as the input. - */ - void configure(const ITensor *input, const ITensor *max, ITensor *output, const float beta, ITensor *tmp); - /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DSoftmaxKernel - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] max Max values tensor info. Same shape as input with dimension 0 set to 1. - * Data types supported: same as @p input. - * @param[in] output Destination tensor info. Data types supported: same as @p input. - * @param[in] beta A scaling factor for the exponent. - * @param[in] tmp Tensor info of auxiliary. Must be type F32 and same shape as the input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *max, - const ITensorInfo *output, const float beta, const ITensorInfo *tmp); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - using LogitsSoftmaxFunction = void(const ITensor &in, const ITensor &max, void *const tmp, ITensor &out, const float beta, - const Window &window); - - LogitsSoftmaxFunction *_func; - const ITensor *_input; - const ITensor *_max; - ITensor *_output; - float _beta; - ITensor *_tmp; //Temporary. Used internally -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h b/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h deleted file mode 100644 index b5d7c692f0..0000000000 --- a/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H -#define ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declaration -class ITensor; - -/** Interface for the space to batch kernel */ -class NESpaceToBatchLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESpaceToBatchLayerKernel"; - } - /** Default constructor */ - NESpaceToBatchLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESpaceToBatchLayerKernel(const NESpaceToBatchLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESpaceToBatchLayerKernel &operator=(const NESpaceToBatchLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NESpaceToBatchLayerKernel(NESpaceToBatchLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NESpaceToBatchLayerKernel &operator=(NESpaceToBatchLayerKernel &&) = default; - /** Default destructor */ - ~NESpaceToBatchLayerKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, ITensor *output); - /** Initialise the kernel's input and output. (Static block shape and paddings) - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] padding_left The left padding of the output tensor. - * @param[in] padding_right The right padding of the output tensor. - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 - * @param[in] output Tensor output. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel (Static block shape and paddings) - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] padding_left The left padding of the output tensor. - * @param[in] padding_right The right padding of the output tensor. - * @param[in] output Tensor output. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; /**< Source tensor */ - const ITensor *_block_shape; /**< Block shape tensor */ - const ITensor *_paddings; /**< Paddings tensor */ - ITensor *_output; /**< Destination tensor */ - DataLayout _data_layout; /**< Data layout to be used at run-time */ - - Size2D _padding_left; - int _block_shape_x; - int _block_shape_y; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h b/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h deleted file mode 100644 index 11443e02c5..0000000000 --- a/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H -#define ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the space to depth kernel */ -class NESpaceToDepthLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESpaceToDepthLayerKernel"; - } - /** Default constructor */ - NESpaceToDepthLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESpaceToDepthLayerKernel(const NESpaceToDepthLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESpaceToDepthLayerKernel &operator=(const NESpaceToDepthLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NESpaceToDepthLayerKernel(NESpaceToDepthLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NESpaceToDepthLayerKernel &operator=(NESpaceToDepthLayerKernel &&) = default; - /** Default destructor */ - ~NESpaceToDepthLayerKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape value - */ - void configure(const ITensor *input, ITensor *output, int32_t block_shape); - /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToDepthLayerKernel - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. - * @param[in] output Tensor output info. Data types supported: same as @p input - * @param[in] block_shape Block shape value - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; /**< Source tensor */ - ITensor *_output; /**< Destination tensor */ - int32_t _block_shape; /**< Block shape */ - DataLayout _data_layout; /**< Data layout of the operation */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEStackLayerKernel.h b/arm_compute/core/NEON/kernels/NEStackLayerKernel.h deleted file mode 100644 index 710a6be7f4..0000000000 --- a/arm_compute/core/NEON/kernels/NEStackLayerKernel.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_NESTACKLAYERKERNEL_H -#define ARM_COMPUTE_NESTACKLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/ -class NEStackLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEStackLayerKernel"; - } - /** Default constructor */ - NEStackLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEStackLayerKernel(const NEStackLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEStackLayerKernel &operator=(const NEStackLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEStackLayerKernel(NEStackLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEStackLayerKernel &operator=(NEStackLayerKernel &&) = default; - /** Default destructor */ - ~NEStackLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @note Supported input tensor rank: up to 4 - * - * @param[in] input Input tensor. Data types supported: All - * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. - * @param[in] idx_input Index of the input tensor in the list of tensors to stack. - * All tensors in the list must have the same shape - * @param[in] num_tensors Number of tensors to stack - * @param[out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const ITensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEStackLayerKernel - * - * @note Supported input tensor rank: up to 4 - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. - * @param[in] idx_input Index of the input tensor in the list of tensors to stack - * All tensors in the list must have the same shape - * @param[in] num_tensors Number of tensors to stack - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output); - - // Inherited methods overridden - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; - unsigned int _axis; - unsigned int _idx_input; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NESTACKLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h b/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h deleted file mode 100644 index be55fd75de..0000000000 --- a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H -#define ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -#include - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the kernel to perform tensor strided slicing */ -class NEStridedSliceKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEStridedSliceKernel"; - } - /** Default constructor */ - NEStridedSliceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEStridedSliceKernel(const NEStridedSliceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEStridedSliceKernel &operator=(const NEStridedSliceKernel &) = delete; - /** Allow instances of this class to be moved */ - NEStridedSliceKernel(NEStridedSliceKernel &&) = default; - /** Allow instances of this class to be moved */ - NEStridedSliceKernel &operator=(NEStridedSliceKernel &&) = default; - /** Default destructor */ - ~NEStridedSliceKernel() = default; - /** Configure kernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] input Source tensor info. Data type supported: All - * @param[out] output Destination tensor info. Data type supported: Same as @p input - * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. - * A slice of size 1 starting from starts[i] in the dimension must be preserved. - */ - void configure(const ITensorInfo *input, ITensorInfo *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); - - /** Static function to check if given info will lead to a valid configuration of @ref NEStridedSliceKernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] input Source tensor info. Data type supported: All - * @param[in] output Destination tensor info. Data type supported: Same as @p input - * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. - * A slice of size 1 starting from starts[i] in the dimension must be preserved. - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; - -private: - Coordinates _starts_abs; /**< Absolute start coordinates */ - Coordinates _final_strides; /**< Final strides */ - int32_t _shrink_mask; /**< Shrink axis mask */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NETableLookupKernel.h b/arm_compute/core/NEON/kernels/NETableLookupKernel.h deleted file mode 100644 index 58bfdbeec2..0000000000 --- a/arm_compute/core/NEON/kernels/NETableLookupKernel.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NETABLELOOKUPKERNEL_H -#define ARM_COMPUTE_NETABLELOOKUPKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; -class ILut; - -/** Interface for the kernel to perform table lookup calculations. */ -class NETableLookupKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NETableLookupKernel"; - } - /** Default constructor */ - NETableLookupKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NETableLookupKernel(const NETableLookupKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NETableLookupKernel &operator=(const NETableLookupKernel &) = delete; - /** Allow instances of this class to be moved */ - NETableLookupKernel(NETableLookupKernel &&) = default; - /** Allow instances of this class to be moved */ - NETableLookupKernel &operator=(NETableLookupKernel &&) = default; - /** Initialise the kernel's input, lut and output. - * - * @param[in] input An input tensor. Data types supported: U8/S16. - * @param[in] lut The input LUT. - * @param[out] output The output tensor. Data types supported: same as @p input - */ - void configure(const ITensor *input, const ILut *lut, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Perform table lookup on a given window. - * - * @param window window Region on which to execute the kernel. - */ - template - void tableLookup(const Window &window); - /** Common signature for all the specialised lut functions - * - * @param[in] window Region on which to execute the kernel. - */ - using TableLookupFunction = void (NETableLookupKernel::*)(const Window &window); - /** Sub function to use for the particular tensor types passed to configure() */ - TableLookupFunction _func; - const ILut *_lut; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NETABLELOOKUPKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEThresholdKernel.h b/arm_compute/core/NEON/kernels/NEThresholdKernel.h deleted file mode 100644 index daad47dbda..0000000000 --- a/arm_compute/core/NEON/kernels/NEThresholdKernel.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NETHRESHOLDKERNEL_H -#define ARM_COMPUTE_NETHRESHOLDKERNEL_H - -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the thresholding kernel */ -class NEThresholdKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEThresholdKernel"; - } - /** Constructor - * Initialize all the pointers to nullptr and parameters to zero. - */ - NEThresholdKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEThresholdKernel(const NEThresholdKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEThresholdKernel &operator=(const NEThresholdKernel &) = delete; - /** Initialise the kernel's input, output and threshold parameters. - * - * @param[in] input An input tensor. Data type supported: U8 - * @param[out] output The output tensor. Data type supported: U8. - * @param[in] info Threshold kernel descriptor - */ - void configure(const ITensor *input, ITensor *output, const ThresholdKernelInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref NEThresholdKernel - * - * @param[in] input Input tensor info. Data type supported: U8 - * @param[in] output Output tensor info. Data type supported: U8 - * @param[in] info Threshold kernel descriptor - * - * @return A status containing an error code in case of failure - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** run binary thresholding on the given window */ - void run_binary(const Window &window); - /** run range thresholding on the given window */ - void run_range(const Window &window); - - void (NEThresholdKernel::*_func)(const Window &window); - - const ITensor *_input; /**< Input */ - ITensor *_output; /**< Output */ - ThresholdKernelInfo _info; /**< Threshold descriptor */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NETileKernel.h b/arm_compute/core/NEON/kernels/NETileKernel.h deleted file mode 100644 index 7a3039adc9..0000000000 --- a/arm_compute/core/NEON/kernels/NETileKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NETILEKERNEL_H -#define ARM_COMPUTE_NETILEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a tile operation */ -class NETileKernel : public INEKernel -{ -public: - /** Default constructor */ - NETileKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NETileKernel(const NETileKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NETileKernel &operator=(const NETileKernel &) = delete; - /** Allow instances of this class to be moved */ - NETileKernel(NETileKernel &&) = default; - /** Allow instances of this class to be moved */ - NETileKernel &operator=(NETileKernel &&) = default; - const char *name() const override - { - return "NETileKernel"; - } - /** Set the source, destination of the kernel - * - * @param[in] input Source tensor. Data type supported: All. - * @param[out] output Destination tensor. Same as @p input - * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. - */ - void configure(const ITensor *input, ITensor *output, const Multiples &multiples); - /** Static function to check if given info will lead to a valid configuration of @ref NETileKernel - * - * @param[in] input Source tensor info. Data type supported: All. - * @param[in] output Destination tensor info. Same as @p input - * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NETILEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NETransposeKernel.h b/arm_compute/core/NEON/kernels/NETransposeKernel.h deleted file mode 100644 index 1507a1c1a4..0000000000 --- a/arm_compute/core/NEON/kernels/NETransposeKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NETRANSPOSEKERNEL_H -#define ARM_COMPUTE_NETRANSPOSEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel which transposes the elements of a matrix. - * - * [width, height, batch] -> [height, width, batch] - * - */ -class NETransposeKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NETransposeKernel"; - } - /** Default constructor */ - NETransposeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NETransposeKernel(const NETransposeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NETransposeKernel &operator=(const NETransposeKernel &) = delete; - /** Allow instances of this class to be moved */ - NETransposeKernel(NETransposeKernel &&) = default; - /** Allow instances of this class to be moved */ - NETransposeKernel &operator=(NETransposeKernel &&) = default; - /** Default destructor */ - ~NETransposeKernel() = default; - - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: Same as @p input - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NETransposeKernel - * - * @param[in] input Input tensor. Data types supported: All - * @param[in] output Output tensor. Data type supported: Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the transpose functions - * - * @param[in] input An input tensor. Data types supported: All - * @param[out] output The output tensor. Data type supported: same as @p input - * @param[in] window Region on which to execute the kernel. - */ - using TransposeFunction = void(const ITensor *input, ITensor *output, const Window &window); - /** Transpose function to use for the particular tensor types passed to configure() */ - TransposeFunction *_func; - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NETRANSPOSEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h b/arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h deleted file mode 100644 index a1278ea307..0000000000 --- a/arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H -#define ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the Upsample layer kernel.*/ -class NEUpsampleLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEUpsampleLayerKernel"; - } - /** Default constructor */ - NEUpsampleLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEUpsampleLayerKernel(const NEUpsampleLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEUpsampleLayerKernel &operator=(const NEUpsampleLayerKernel &) = delete; - /** Default Move Constructor. */ - NEUpsampleLayerKernel(NEUpsampleLayerKernel &&) = default; - /** Default move assignment operator */ - NEUpsampleLayerKernel &operator=(NEUpsampleLayerKernel &&) = default; - /** Default destructor */ - ~NEUpsampleLayerKernel() = default; - /** Set the input output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] info Contains stride information described in @ref Size2D. - * @param[in] policy Defines the policy to fill the intermediate pixels. - * - */ - void configure(const ITensor *input, ITensor *output, const Size2D &info, const InterpolationPolicy policy); - /** Static function to check if given info will lead to a valid configuration of @ref NEUpsampleLayerKernel - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] output Destination tensor info. Data types supported: same as @p input. - * @param[in] info Contains stride information described in @ref Size2D. - * @param[in] policy Defines the policy to fill the intermediate pixels. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy policy); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Function to run upsample layer (NCHW) - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void upsample_nchw(const Window &window); - /** Function to run upsample layer (NHWC) - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void upsample_nhwc(const Window &window); - - using UpsampleFunctionPtr = void (NEUpsampleLayerKernel::*)(const Window &window); - -private: - UpsampleFunctionPtr _func; - const ITensor *_input; - ITensor *_output; - Size2D _info; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEWarpKernel.h b/arm_compute/core/NEON/kernels/NEWarpKernel.h deleted file mode 100644 index 21fc7b2df1..0000000000 --- a/arm_compute/core/NEON/kernels/NEWarpKernel.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2016-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEWARPKERNEL_H -#define ARM_COMPUTE_NEWARPKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -#include -#include -namespace arm_compute -{ -class ITensor; - -/** Common interface for warp affine and warp perspective */ -class INEWarpKernel : public INEKernel -{ -public: - /** Default constructor */ - INEWarpKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - INEWarpKernel(const INEWarpKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - INEWarpKernel &operator=(const INEWarpKernel &) = delete; - /** Allow instances of this class to be moved */ - INEWarpKernel(INEWarpKernel &&) = default; - /** Allow instances of this class to be moved */ - INEWarpKernel &operator=(INEWarpKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data type supported: U8. - * @param[in] matrix The perspective or affine matrix to use. Must be 2x3 for affine and 3x3 for perspective of type float. - * The matrix argument requires 9 values, for the affine case the last 3 values are ignored. - * @param[in] border_mode Strategy to use for borders - * @param[in] constant_border_value Constant value used for filling the border. - */ - virtual void configure(const ITensor *input, ITensor *output, const std::array &matrix, BorderMode border_mode, uint8_t constant_border_value); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - - // Inherited methods overridden: - BorderSize border_size() const override; - -protected: - /** function to perform warp affine or warp perspective on the given window when border mode == UNDEFINED - * - * @param[in] window Region on which to execute the kernel - */ - virtual void warp_undefined(const Window &window) = 0; - /** function to perform warp affine or warp perspective on the given window when border mode == CONSTANT - * - * @param[in] window Region on which to execute the kernel - */ - virtual void warp_constant(const Window &window) = 0; - /** function to perform warp affine or warp perspective on the given window when border mode == REPLICATE - * - * @param[in] window Region on which to execute the kernel - */ - virtual void warp_replicate(const Window &window) = 0; - /** Common signature for all the specialised warp functions - * - * @param[in] window Region on which to execute the kernel. - */ - void (INEWarpKernel::*_func)(const Window &window); - - const ITensor *_input; /**< Input Tensor */ - ITensor *_output; /**< Output Tensor */ - uint8_t _constant_border_value; /**< Constant value used for filling the border. This value is used for those pixels out of the ROI when the border mode is CONSTANT */ - std::array _matrix; /**< The affine or perspective matrix. Must be 2x3 for warp affine or 3x3 for warp perspective of type float. */ -}; - -/** Template interface for the kernel to compute warp affine - * - */ -template -class NEWarpAffineKernel : public INEWarpKernel -{ -private: - const char *name() const override - { - return "NEWarpAffineKernel"; - } - // Inherited methods overridden: - void warp_undefined(const Window &window) override; - void warp_constant(const Window &window) override; - void warp_replicate(const Window &window) override; -}; - -/** Template interface for the kernel to compute warp perspective - * - */ -template -class NEWarpPerspectiveKernel : public INEWarpKernel -{ -private: - const char *name() const override - { - return "NEWarpPerspectiveKernel"; - } - // Inherited methods overridden: - void warp_undefined(const Window &window) override; - void warp_constant(const Window &window) override; - void warp_replicate(const Window &window) override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEWARPKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h deleted file mode 100644 index 8cb3ed8796..0000000000 --- a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H -#define ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform reshaping on the weights used by convolution and locally connected layer - * - * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. - * In combination with the @ref NEIm2ColKernel can transform a convolution to a matrix multiplication. - * - * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have: - * @f[ - * \left( \begin{array}{ccc} - * a000 & a001 & a002 \\ - * a010 & a011 & a012 \\ - * a020 & a021 & a022 \\ - * \end{array} \right) - * \left( \begin{array}{ccc} - * a100 & a101 & a102 \\ - * a110 & a111 & a112 \\ - * a120 & a121 & a122 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccccccccc} - * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\ - * \end{array} \right) - * @f] - */ -class NEWeightsReshapeKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEWeightsReshapeKernel"; - } - /** Constructor.*/ - NEWeightsReshapeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWeightsReshapeKernel(const NEWeightsReshapeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWeightsReshapeKernel &operator=(const NEWeightsReshapeKernel &) = delete; - /** Allow instances of this class to be moved */ - NEWeightsReshapeKernel(NEWeightsReshapeKernel &&) = default; - /** Allow instances of this class to be moved */ - NEWeightsReshapeKernel &operator=(NEWeightsReshapeKernel &&) = default; - /** Default destructor */ - ~NEWeightsReshapeKernel() = default; - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, - * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. - * Data types supported: All - * @param[in] bias The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with - * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input - * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. - * @param[out] output The output tensor. Data types supported: Same as @p input - */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEWeightsReshapeKernel - * - * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, - * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. - * Data types supported: All - * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with - * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input - * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. - * @param[in] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - const ITensor *_bias; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h deleted file mode 100644 index 64d741deab..0000000000 --- a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H -#define ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the width concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class NEWidthConcatenateLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEWidthConcatenateLayerKernel"; - } - /** Default constructor */ - NEWidthConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWidthConcatenateLayerKernel(const NEWidthConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWidthConcatenateLayerKernel &operator=(const NEWidthConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEWidthConcatenateLayerKernel(NEWidthConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEWidthConcatenateLayerKernel &operator=(NEWidthConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~NEWidthConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] width_offset The offset on the X axis. - * @param[in,out] output Output tensor info. Data types supported: Same as @p input. - */ - void configure(const ITensorInfo *input, unsigned int width_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEWidthConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] width_offset The offset on the X axis. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; - -private: - unsigned int _width_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h b/arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h deleted file mode 100644 index 8795e4aa56..0000000000 --- a/arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEYOLOLAYERKERNEL_H -#define ARM_COMPUTE_NEYOLOLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the YOLO layer kernel. */ -class NEYOLOLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEYOLOLayerKernel"; - } - /** Constructor */ - NEYOLOLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEYOLOLayerKernel(const NEYOLOLayerKernel &) = delete; - /** Default move constructor */ - NEYOLOLayerKernel(NEYOLOLayerKernel &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEYOLOLayerKernel &operator=(const NEYOLOLayerKernel &) = delete; - /** Default move assignment operator */ - NEYOLOLayerKernel &operator=(NEYOLOLayerKernel &&) = default; - /** Default destructor */ - ~NEYOLOLayerKernel() = default; - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] act_info Activation layer parameters. - * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) - */ - void configure(ITensor *input, ITensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); - /** Static function to check if given info will lead to a valid configuration of @ref NEYOLOLayerKernel - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result - * of the activation function. Data types supported: F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Function to run YOLO layer - * - * @param[in] window Region on which to execute the kernel. - */ - template - void yolo_layer_nchw(const Window &window); - /** Function to run YOLO layer on tensors with NHWC format - * - * @param[in] window Region on which to execute the kernel. - */ - template - void yolo_layer_nhwc(const Window &window); - /** Common signature for all the yolo layer functions - * - * @param[in] window Region on which to execute the kernel. - */ - using YOLOFunctionPtr = void (NEYOLOLayerKernel::*)(const Window &window); - -private: - YOLOFunctionPtr _func; - ITensor *_input; - ITensor *_output; - ActivationLayerInfo _act_info; - int32_t _num_classes; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEYOLOLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/assembly/arm_gemm_local.hpp b/arm_compute/core/NEON/kernels/assembly/arm_gemm_local.hpp deleted file mode 100644 index de92cce653..0000000000 --- a/arm_compute/core/NEON/kernels/assembly/arm_gemm_local.hpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2018 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#pragma once - -/* This file is used to configure integration-specific aspects of arm_gemm into ACL */ - -#include "arm_compute/core/CPP/CPPTypes.h" - -namespace arm_gemm -{ -using CPUModel = arm_compute::CPUModel; -using CPUInfo = arm_compute::CPUInfo; -} // namespace arm_compute - - - diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 42e42cc2d6..306bdc6706 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -2246,5 +2246,14 @@ struct IOFormatInfo /** Align columns */ bool align_columns; }; + +/** Internal keypoint class for Lucas-Kanade Optical Flow */ +struct NELKInternalKeypoint +{ + float x{ 0.f }; /**< x coordinate of the keypoint */ + float y{ 0.f }; /**< y coordinate of the keypoint */ + bool tracking_status{ false }; /**< the tracking status of the keypoint */ +}; + } // namespace arm_compute #endif /* ARM_COMPUTE_TYPES_H */ diff --git a/arm_compute/core/utils/misc/Traits.h b/arm_compute/core/utils/misc/Traits.h index 58fb1bff59..933922f63c 100644 --- a/arm_compute/core/utils/misc/Traits.h +++ b/arm_compute/core/utils/misc/Traits.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_UTILS_TRAITS_TRAITS_H #define ARM_COMPUTE_UTILS_TRAITS_TRAITS_H +#include "arm_compute/core/Types.h" #include namespace arm_compute diff --git a/arm_compute/runtime/CL/functions/CLHarrisCorners.h b/arm_compute/runtime/CL/functions/CLHarrisCorners.h index 90d8c8873f..326a895d39 100644 --- a/arm_compute/runtime/CL/functions/CLHarrisCorners.h +++ b/arm_compute/runtime/CL/functions/CLHarrisCorners.h @@ -29,13 +29,13 @@ #include "arm_compute/core/CL/ICLArray.h" #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" -#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" +#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" - #include #include diff --git a/arm_compute/runtime/IOperator.h b/arm_compute/runtime/IOperator.h index e7952bb748..0097383115 100644 --- a/arm_compute/runtime/IOperator.h +++ b/arm_compute/runtime/IOperator.h @@ -24,6 +24,8 @@ #ifndef ARM_COMPUTE_IOPERATOR_H #define ARM_COMPUTE_IOPERATOR_H +#include "arm_compute/core/ITensorPack.h" +#include "arm_compute/core/experimental/Types.h" #include "arm_compute/runtime/IOperator.h" #include "arm_compute/runtime/IRuntimeContext.h" #include "arm_compute/runtime/Types.h" diff --git a/arm_compute/runtime/ITransformWeights.h b/arm_compute/runtime/ITransformWeights.h index 2e2e764c8e..9392be05e5 100644 --- a/arm_compute/runtime/ITransformWeights.h +++ b/arm_compute/runtime/ITransformWeights.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,7 @@ #define ARM_COMPUTE_ITRANSFORMWEIGHTS_H #include +#include namespace arm_compute { @@ -124,4 +125,4 @@ protected: }; } // arm_compute -#endif /*ARM_COMPUTE_ITRANSFORMWEIGHTS_H */ \ No newline at end of file +#endif /*ARM_COMPUTE_ITRANSFORMWEIGHTS_H */ diff --git a/arm_compute/runtime/NEON/INEOperator.h b/arm_compute/runtime/NEON/INEOperator.h index 415e767eec..a5ffc74940 100644 --- a/arm_compute/runtime/NEON/INEOperator.h +++ b/arm_compute/runtime/NEON/INEOperator.h @@ -25,7 +25,6 @@ #define ARM_COMPUTE_INEOPERATOR_H #include "../../core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/runtime/IOperator.h" #include "arm_compute/runtime/IRuntimeContext.h" #include "arm_compute/runtime/Types.h" @@ -34,6 +33,8 @@ namespace arm_compute { +class ICPPKernel; +using INEKernel = ICPPKernel; namespace experimental { /** Basic interface for functions which have a single async NEON kernel */ @@ -53,6 +54,8 @@ public: INEOperator &operator=(const INEOperator &) = delete; /** Default move assignment operator */ INEOperator &operator=(INEOperator &&) = default; + /** Default destructor */ + ~INEOperator(); // Inherited methods overridden: void run(ITensorPack &tensors) override; diff --git a/arm_compute/runtime/NEON/INESimpleFunction.h b/arm_compute/runtime/NEON/INESimpleFunction.h index 7f2ed2e16f..979a0f7f07 100644 --- a/arm_compute/runtime/NEON/INESimpleFunction.h +++ b/arm_compute/runtime/NEON/INESimpleFunction.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,27 +24,38 @@ #ifndef ARM_COMPUTE_INESIMPLEFUNCTION_H #define ARM_COMPUTE_INESIMPLEFUNCTION_H -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/runtime/IFunction.h" #include namespace arm_compute { +class ICPPKernel; +class NEFillBorderKernel; +using INEKernel = ICPPKernel; /** Basic interface for functions which have a single NEON kernel */ class INESimpleFunction : public IFunction { public: /** Constructor */ INESimpleFunction(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INESimpleFunction(const INESimpleFunction &) = delete; + /** Default move constructor */ + INESimpleFunction(INESimpleFunction &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INESimpleFunction &operator=(const INESimpleFunction &) = delete; + /** Default move assignment operator */ + INESimpleFunction &operator=(INESimpleFunction &&) = default; + /** Default destructor */ + ~INESimpleFunction(); // Inherited methods overridden: void run() override final; protected: - std::unique_ptr _kernel; /**< Kernel to run */ - NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */ + std::unique_ptr _kernel; /**< Kernel to run */ + std::unique_ptr _border_handler; /**< Kernel to handle image borders */ }; } #endif /*ARM_COMPUTE_INESIMPLEFUNCTION_H */ diff --git a/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h b/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h index 7d352eb82b..9df0d78526 100644 --- a/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h +++ b/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H #define ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IRuntimeContext.h" @@ -32,6 +31,8 @@ namespace arm_compute { +class ICPPKernel; +using INEKernel = ICPPKernel; /** Basic interface for functions which have a single NEON kernel and no border */ class INESimpleFunctionNoBorder : public IFunction { @@ -49,6 +50,8 @@ public: INESimpleFunctionNoBorder &operator=(const INESimpleFunctionNoBorder &) = delete; /** Default move assignment operator */ INESimpleFunctionNoBorder &operator=(INESimpleFunctionNoBorder &&) = default; + /** Default destructor */ + ~INESimpleFunctionNoBorder(); // Inherited methods overridden: void run() override final; diff --git a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h b/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h index 7b35e6db9e..df7dc2d980 100644 --- a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h +++ b/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,7 @@ #ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H #define ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { @@ -35,9 +35,21 @@ class ITensor; * @note The image data type for the inputs must be U8 or S16 * @note The function calculates the absolute difference also when the 2 inputs have different image data types */ -class NEAbsoluteDifference : public INESimpleFunction +class NEAbsoluteDifference : public INESimpleFunctionNoBorder { public: + /** Default constructor */ + NEAbsoluteDifference() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAbsoluteDifference(const NEAbsoluteDifference &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAbsoluteDifference &operator=(const NEAbsoluteDifference &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEAbsoluteDifference(NEAbsoluteDifference &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEAbsoluteDifference &operator=(NEAbsoluteDifference &&) = delete; + /** Default destructor */ + ~NEAbsoluteDifference(); /** Set the inputs and output images * * @param[in] input1 Source tensor. Data types supported: U8/S16. diff --git a/arm_compute/runtime/NEON/functions/NEAccumulate.h b/arm_compute/runtime/NEON/functions/NEAccumulate.h index f403a7772b..6dcef09f10 100644 --- a/arm_compute/runtime/NEON/functions/NEAccumulate.h +++ b/arm_compute/runtime/NEON/functions/NEAccumulate.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -36,6 +36,18 @@ class ITensor; class NEAccumulate : public INESimpleFunctionNoBorder { public: + /** Default constructor */ + NEAccumulate() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulate(const NEAccumulate &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulate &operator=(const NEAccumulate &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEAccumulate(NEAccumulate &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEAccumulate &operator=(NEAccumulate &&) = delete; + /** Default destructor */ + ~NEAccumulate(); /** Set the input and accumulation tensors * * @param[in] input Source tensor. Data type supported: U8. @@ -48,6 +60,18 @@ public: class NEAccumulateWeighted : public INESimpleFunctionNoBorder { public: + /** Default constructor */ + NEAccumulateWeighted() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateWeighted(const NEAccumulateWeighted &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateWeighted &operator=(const NEAccumulateWeighted &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEAccumulateWeighted(NEAccumulateWeighted &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEAccumulateWeighted &operator=(NEAccumulateWeighted &&) = delete; + /** Default destructor */ + ~NEAccumulateWeighted(); /** Set the input and accumulation tensors, and the scale value * * @param[in] input Source tensor. Data type supported: U8. @@ -62,6 +86,18 @@ public: class NEAccumulateSquared : public INESimpleFunctionNoBorder { public: + /** Default constructor */ + NEAccumulateSquared() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateSquared(const NEAccumulateSquared &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateSquared &operator=(const NEAccumulateSquared &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEAccumulateSquared(NEAccumulateSquared &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEAccumulateSquared &operator=(NEAccumulateSquared &&) = delete; + /** Default destructor */ + ~NEAccumulateSquared(); /** Set the input and accumulation tensors and the shift value. * * @param[in] input Source tensor. Data type supported: U8. diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h index cfece5c392..3f410fcd8c 100644 --- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h @@ -46,8 +46,6 @@ public: * @param[in] ctx Runtime context to be used by the function */ NEActivationLayer(IRuntimeContext *ctx = nullptr); - /** Destructor */ - ~NEActivationLayer(); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEActivationLayer(const NEActivationLayer &) = delete; /** Default move constructor */ @@ -56,6 +54,8 @@ public: NEActivationLayer &operator=(const NEActivationLayer &) = delete; /** Default move assignment operator */ NEActivationLayer &operator=(NEActivationLayer &&); + /** Destructor */ + ~NEActivationLayer(); /** [NEActivationLayer snippet] **/ /** Set the input and output tensor. * @@ -93,6 +93,19 @@ namespace experimental class NEActivationLayer : public INEOperator { public: + /** Constructor */ + NEActivationLayer() = default; + /** Prevent instances of this class from being copied */ + NEActivationLayer(const NEActivationLayer &) = delete; + /** Default move constructor */ + NEActivationLayer(NEActivationLayer &&) = default; + /** Prevent instances of this class from being copied */ + NEActivationLayer &operator=(const NEActivationLayer &) = delete; + /** Default move assignment operator */ + NEActivationLayer &operator=(NEActivationLayer &&) = default; + /** Destructor */ + ~NEActivationLayer(); + /** Set the input and output tensor. * * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h index 61762f37e1..4b13d1f44e 100644 --- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h @@ -52,6 +52,16 @@ class NEArgMinMaxLayer : public IFunction public: /** Constructor */ NEArgMinMaxLayer(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArgMinMaxLayer(const NEArgMinMaxLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArgMinMaxLayer &operator=(const NEArgMinMaxLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEArgMinMaxLayer(NEArgMinMaxLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEArgMinMaxLayer &operator=(NEArgMinMaxLayer &&) = delete; + /** Default destructor */ + ~NEArgMinMaxLayer(); /** Set the input and output tensors. * * @param[in] input Input source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32. diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h index e10771ef4b..6aaa5ff4f7 100644 --- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h +++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h @@ -38,6 +38,18 @@ namespace experimental class NEArithmeticAddition : public INEOperator { public: + /** Constructor */ + NEArithmeticAddition() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticAddition(const NEArithmeticAddition &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticAddition &operator=(const NEArithmeticAddition &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEArithmeticAddition(NEArithmeticAddition &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEArithmeticAddition &operator=(NEArithmeticAddition &&) = delete; + /** Default destructor */ + ~NEArithmeticAddition(); /** Initialise the kernel's inputs, output and conversion policy. * * Valid configurations (Input1,Input2) -> Output : diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h index 1f77164a43..6d56a267a7 100644 --- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,14 +24,16 @@ #ifndef ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H #define ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H -#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" +#include + namespace arm_compute { class ITensor; +class NEBatchNormalizationLayerKernel; /** Basic function to run @ref NENormalizationLayerKernel and simulate a batch normalization layer. * @@ -42,8 +44,18 @@ class ITensor; class NEBatchNormalizationLayer : public IFunction { public: - /** Default constructor */ + /** Constructor */ NEBatchNormalizationLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchNormalizationLayer(const NEBatchNormalizationLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchNormalizationLayer &operator=(const NEBatchNormalizationLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEBatchNormalizationLayer(NEBatchNormalizationLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEBatchNormalizationLayer &operator=(NEBatchNormalizationLayer &&) = delete; + /** Default destructor */ + ~NEBatchNormalizationLayer(); /** Set the input and output tensors. * * @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place @@ -85,7 +97,7 @@ public: void run() override; private: - NEBatchNormalizationLayerKernel _norm_kernel; /**< Batch normalization layer kernel */ + std::unique_ptr _norm_kernel; /**< Batch normalization layer kernel */ }; } #endif /* ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h index 1a6ffa9506..c2fd26d34c 100644 --- a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h +++ b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,18 +26,30 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NEBatchToSpaceLayerKernel. */ class NEBatchToSpaceLayer : public INESimpleFunctionNoBorder { public: + /** Constructor */ + NEBatchToSpaceLayer() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchToSpaceLayer(const NEBatchToSpaceLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchToSpaceLayer &operator=(const NEBatchToSpaceLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEBatchToSpaceLayer(NEBatchToSpaceLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEBatchToSpaceLayer &operator=(NEBatchToSpaceLayer &&) = delete; + /** Default destructor */ + ~NEBatchToSpaceLayer() = default; /** Set the input and output tensors. * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h index c612a146ac..3203d2b9a7 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,6 +34,18 @@ class ITensor; class NEBitwiseAnd : public INESimpleFunctionNoBorder { public: + /** Constructor */ + NEBitwiseAnd() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseAnd(const NEBitwiseAnd &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseAnd &operator=(const NEBitwiseAnd &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEBitwiseAnd(NEBitwiseAnd &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEBitwiseAnd &operator=(NEBitwiseAnd &&) = delete; + /** Default destructor */ + ~NEBitwiseAnd() = default; /** Initialise the kernel's inputs and output * * @param[in] input1 First tensor input. Data type supported: U8. diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h index f6ef975dc7..9fa0d38caf 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h index 8fc4b0d362..fba6b784de 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h index 20e23af234..c6cb584284 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h index 14d5de4ca4..de8dfef4ed 100644 --- a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h +++ b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h @@ -24,19 +24,20 @@ #ifndef ARM_COMPUTE_NEBOUNDINGBOXTRANSOFORM_H #define ARM_COMPUTE_NEBOUNDINGBOXTRANSOFORM_H -#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NEBoundingBoxTransformKernel. * * This function calls the following Neon kernels: * -# @ref NEBoundingBoxTransformKernel */ -class NEBoundingBoxTransform : public INESimpleFunction +class NEBoundingBoxTransform : public INESimpleFunctionNoBorder { public: /** Set the input and output tensors. diff --git a/arm_compute/runtime/NEON/functions/NEBox3x3.h b/arm_compute/runtime/NEON/functions/NEBox3x3.h index 80cd5084ab..4d8b12684b 100644 --- a/arm_compute/runtime/NEON/functions/NEBox3x3.h +++ b/arm_compute/runtime/NEON/functions/NEBox3x3.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NECannyEdge.h b/arm_compute/runtime/NEON/functions/NECannyEdge.h index f171c3bed0..b08646de0d 100644 --- a/arm_compute/runtime/NEON/functions/NECannyEdge.h +++ b/arm_compute/runtime/NEON/functions/NECannyEdge.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_NECANNYEDGE_H #define ARM_COMPUTE_NECANNYEDGE_H -#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -38,6 +36,10 @@ namespace arm_compute { class ITensor; +class NEGradientKernel; +class NEFillBorderKernel; +class NEEdgeNonMaxSuppressionKernel; +class NEEdgeTraceKernel; /** Basic function to execute canny edge on NEON. This function calls the following NEON kernels and functions: * @@ -64,6 +66,8 @@ public: NECannyEdge(const NECannyEdge &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ NECannyEdge &operator=(const NECannyEdge &) = delete; + /** Default destructor */ + ~NECannyEdge(); /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode. * * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) @@ -81,19 +85,19 @@ public: void run() override; private: - MemoryGroup _memory_group; /**< Function's memory group */ - std::unique_ptr _sobel; /**< Pointer to Sobel kernel */ - std::unique_ptr _gradient; /**< Gradient kernel */ - NEEdgeNonMaxSuppressionKernel _non_max_suppr; /**< Non-Maxima suppression kernel */ - NEEdgeTraceKernel _edge_trace; /**< Edge tracing kernel */ - NEFillBorderKernel _border_mag_gradient; /**< Fill border on magnitude tensor kernel */ - NEFillBorderKernel _border_edge_trace; /**< Fill border before edge trace */ - Tensor _gx; /**< Source tensor - Gx component */ - Tensor _gy; /**< Source tensor - Gy component */ - Tensor _magnitude; /**< Source tensor - Magnitude */ - Tensor _phase; /**< Source tensor - Phase */ - Tensor _nonmax; /**< Source tensor - Non-Maxima suppressed */ - ITensor *_output; /**< Output tensor provided by the user. */ + MemoryGroup _memory_group; /**< Function's memory group */ + std::unique_ptr _sobel; /**< Pointer to Sobel kernel */ + std::unique_ptr _gradient; /**< Gradient kernel */ + std::unique_ptr _non_max_suppr; /**< Non-Maxima suppression kernel */ + std::unique_ptr _edge_trace; /**< Edge tracing kernel */ + std::unique_ptr _border_mag_gradient; /**< Fill border on magnitude tensor kernel */ + std::unique_ptr _border_edge_trace; /**< Fill border before edge trace */ + Tensor _gx; /**< Source tensor - Gx component */ + Tensor _gy; /**< Source tensor - Gy component */ + Tensor _magnitude; /**< Source tensor - Magnitude */ + Tensor _phase; /**< Source tensor - Phase */ + Tensor _nonmax; /**< Source tensor - Non-Maxima suppressed */ + ITensor *_output; /**< Output tensor provided by the user. */ }; } #endif /* ARM_COMPUTE_NECANNYEDGE_H */ diff --git a/arm_compute/runtime/NEON/functions/NECast.h b/arm_compute/runtime/NEON/functions/NECast.h index ca818bea27..e536317660 100644 --- a/arm_compute/runtime/NEON/functions/NECast.h +++ b/arm_compute/runtime/NEON/functions/NECast.h @@ -25,16 +25,17 @@ #define ARM_COMPUTE_NECAST_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NEDepthConvertLayerKernel. * This function ignores the scale and zeroPoint of quanized tensors,so QASYMM8 input is treated as uint8 values. */ -class NECast : public INESimpleFunction +class NECast : public INESimpleFunctionNoBorder { public: /** Initialize the function's source, destination diff --git a/arm_compute/runtime/NEON/functions/NEChannelCombine.h b/arm_compute/runtime/NEON/functions/NEChannelCombine.h index c4ced62e72..44a0504824 100644 --- a/arm_compute/runtime/NEON/functions/NEChannelCombine.h +++ b/arm_compute/runtime/NEON/functions/NEChannelCombine.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NEChannelExtract.h b/arm_compute/runtime/NEON/functions/NEChannelExtract.h index 54059e91e1..4b6383d6b1 100644 --- a/arm_compute/runtime/NEON/functions/NEChannelExtract.h +++ b/arm_compute/runtime/NEON/functions/NEChannelExtract.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h index f31518e85b..aa11396c20 100644 --- a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h +++ b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,12 +24,14 @@ #ifndef ARM_COMPUTE_NECHANNELSHUFFLELAYER_H #define ARM_COMPUTE_NECHANNELSHUFFLELAYER_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; /** Basic function to run @ref NEChannelShuffleLayerKernel * diff --git a/arm_compute/runtime/NEON/functions/NECol2Im.h b/arm_compute/runtime/NEON/functions/NECol2Im.h index e03ec42c4f..69459a83c1 100644 --- a/arm_compute/runtime/NEON/functions/NECol2Im.h +++ b/arm_compute/runtime/NEON/functions/NECol2Im.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,12 +26,13 @@ #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/core/Error.h" #include "arm_compute/core/Size2D.h" -#include "arm_compute/core/Types.h" namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NECol2Im */ class NECol2Im : public INESimpleFunctionNoBorder diff --git a/arm_compute/runtime/NEON/functions/NEColorConvert.h b/arm_compute/runtime/NEON/functions/NEColorConvert.h index b4c4158804..545550c04a 100644 --- a/arm_compute/runtime/NEON/functions/NEColorConvert.h +++ b/arm_compute/runtime/NEON/functions/NEColorConvert.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h b/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h index 44f3f860cf..b63243fec6 100644 --- a/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h +++ b/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,19 +24,20 @@ #ifndef ARM_COMPUTE_NECOMPUTEALLANCHORS_H #define ARM_COMPUTE_NECOMPUTEALLANCHORS_H -#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NEComputeAllAnchorsKernel. * * This function calls the following NEON kernels: * -# @ref NEComputeAllAnchorsKernel */ -class NEComputeAllAnchors : public INESimpleFunction +class NEComputeAllAnchors : public INESimpleFunctionNoBorder { public: /** Set the input and output tensors. diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h index 82b4517dd3..fd35d0bc46 100644 --- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h @@ -26,7 +26,6 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/INEOperator.h" #include "support/Requires.h" @@ -106,8 +105,18 @@ namespace experimental class NEConcatenation : public INEOperator { public: - /** Default constructor */ + /** Constructor */ NEConcatenation(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConcatenation(const NEConcatenation &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConcatenation &operator=(const NEConcatenation &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConcatenation(NEConcatenation &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConcatenation &operator=(NEConcatenation &&) = delete; + /** Default destructor */ + ~NEConcatenation() = default; /** Initialise the kernel's inputs vector and output. * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. @@ -135,9 +144,9 @@ public: void run(ITensorPack &tensors) override; private: - std::vector> _concat_kernels; - unsigned int _num_inputs; - unsigned int _axis; + std::vector> _concat_kernels; + unsigned int _num_inputs; + unsigned int _axis; }; } // namespace experimental } // namespace arm_compute diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h index 42a62dc0ab..984e8d68c0 100644 --- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h +++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,16 +24,17 @@ #ifndef ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H #define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H -#include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/ITransformWeights.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/Tensor.h" +#include namespace arm_compute { // Forward declarations class ITensor; +class NEConvertFullyConnectedWeightsKernel; /** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */ class NEConvertFullyConnectedWeights : public IFunction @@ -41,6 +42,16 @@ class NEConvertFullyConnectedWeights : public IFunction public: /** Default constructor */ NEConvertFullyConnectedWeights(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvertFullyConnectedWeights(const NEConvertFullyConnectedWeights &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvertFullyConnectedWeights &operator=(const NEConvertFullyConnectedWeights &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConvertFullyConnectedWeights(NEConvertFullyConnectedWeights &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConvertFullyConnectedWeights &operator=(NEConvertFullyConnectedWeights &&) = delete; + /** Default destructor */ + ~NEConvertFullyConnectedWeights(); /** Initialize the function. * * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. @@ -64,7 +75,7 @@ public: void run() override; private: - NEConvertFullyConnectedWeightsKernel _kernel; + std::unique_ptr _kernel; }; namespace weights_transformations diff --git a/arm_compute/runtime/NEON/functions/NEConvolution.h b/arm_compute/runtime/NEON/functions/NEConvolution.h index eb16a4582e..9415cf0835 100644 --- a/arm_compute/runtime/NEON/functions/NEConvolution.h +++ b/arm_compute/runtime/NEON/functions/NEConvolution.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_NECONVOLUTION_H #define ARM_COMPUTE_NECONVOLUTION_H -#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -39,6 +37,13 @@ namespace arm_compute { class ITensor; +class NEFillBorderKernel; +template +class NEConvolutionKernel; +template +class NESeparableConvolutionHorKernel; +template +class NESeparableConvolutionVertKernel; /** Basic function to execute convolution of size 3x3. This function calls the following NEON kernels: * @@ -49,6 +54,18 @@ class ITensor; class NEConvolution3x3 : public INESimpleFunction { public: + /** Constructor */ + NEConvolution3x3() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolution3x3(const NEConvolution3x3 &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolution3x3 &operator=(const NEConvolution3x3 &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConvolution3x3(NEConvolution3x3 &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConvolution3x3 &operator=(NEConvolution3x3 &&) = delete; + /** Default destructor */ + ~NEConvolution3x3(); /** Initialize the function's source, destination, conv and border_mode. * * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) @@ -74,6 +91,16 @@ class NEConvolutionSquare : public IFunction public: /** Default constructor */ NEConvolutionSquare(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionSquare(const NEConvolutionSquare &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionSquare &operator=(const NEConvolutionSquare &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConvolutionSquare(NEConvolutionSquare &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConvolutionSquare &operator=(NEConvolutionSquare &&) = delete; + /** Default destructor */ + ~NEConvolutionSquare(); /** Initialize the function's source, destination, conv and border_mode. * * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) @@ -89,13 +116,13 @@ public: void run() override; private: - MemoryGroup _memory_group; /**< Function memory group */ - Tensor _tmp; /**< temporary buffer for output of horizontal pass */ - bool _is_separable; /**< true if the convolution can be separated */ - NESeparableConvolutionHorKernel _kernel_hor; /**< kernel for horizontal pass of separated convolution */ - NESeparableConvolutionVertKernel _kernel_vert; /**< kernel for vertical pass of separated convolution */ - NEConvolutionKernel _kernel; /**< kernel for non-separated convolution **/ - NEFillBorderKernel _border_handler; /**< kernel for border handling */ + MemoryGroup _memory_group; /**< Function memory group */ + Tensor _tmp; /**< temporary buffer for output of horizontal pass */ + bool _is_separable; /**< true if the convolution can be separated */ + std::unique_ptr> _kernel_hor; /**< kernel for horizontal pass of separated convolution */ + std::unique_ptr> _kernel_vert; /**< kernel for vertical pass of separated convolution */ + std::unique_ptr> _kernel; /**< kernel for non-separated convolution **/ + std::unique_ptr _border_handler; /**< kernel for border handling */ }; /** Basic function to run 5x5 convolution. */ @@ -115,6 +142,18 @@ using NEConvolution9x9 = NEConvolutionSquare<9>; class NEConvolutionRectangle : public INESimpleFunction { public: + /** Constructor */ + NEConvolutionRectangle() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionRectangle(const NEConvolutionRectangle &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionRectangle &operator=(const NEConvolutionRectangle &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConvolutionRectangle(NEConvolutionRectangle &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConvolutionRectangle &operator=(NEConvolutionRectangle &&) = delete; + /** Default destructor */ + ~NEConvolutionRectangle(); /** Initialize the function's source, destination, conv and border_mode. * * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h index e8b425b459..54dae57752 100644 --- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -75,7 +75,16 @@ class NEConvolutionLayer : public IFunction public: /** Constructor */ NEConvolutionLayer(std::shared_ptr memory_manager = nullptr); - + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionLayer(const NEConvolutionLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionLayer &operator=(const NEConvolutionLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConvolutionLayer(NEConvolutionLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConvolutionLayer &operator=(NEConvolutionLayer &&) = delete; + /** Default destructor */ + ~NEConvolutionLayer() = default; /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h index df1a49863a..a58ac9e620 100644 --- a/arm_compute/runtime/NEON/functions/NECopy.h +++ b/arm_compute/runtime/NEON/functions/NECopy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,11 +30,24 @@ namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NECopyKernel */ class NECopy : public INESimpleFunctionNoBorder { public: + /** Constructor */ + NECopy() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECopy(const NECopy &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECopy &operator=(const NECopy &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NECopy(NECopy &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NECopy &operator=(NECopy &&) = delete; + /** Default destructor */ + ~NECopy(); /** Initialise the function's source and destination. * * @param[in] input Source tensor. Data types supported: All diff --git a/arm_compute/runtime/NEON/functions/NECropResize.h b/arm_compute/runtime/NEON/functions/NECropResize.h index 361c236293..5c3733f8ee 100644 --- a/arm_compute/runtime/NEON/functions/NECropResize.h +++ b/arm_compute/runtime/NEON/functions/NECropResize.h @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_NEON_CROP_RESIZE_H #define ARM_COMPUTE_NEON_CROP_RESIZE_H -#include "arm_compute/core/NEON/kernels/NECropKernel.h" #include "arm_compute/runtime/NEON/functions/NEScale.h" #include @@ -33,6 +32,7 @@ namespace arm_compute { // Forward Declarations class ITensor; +class NECropKernel; /** Function to perform cropping and resizing */ class NECropResize : public IFunction @@ -49,7 +49,7 @@ public: /** Allow instances of this class to be moved */ NECropResize &operator=(NECropResize &&) = default; /** Default destructor */ - virtual ~NECropResize() = default; + ~NECropResize(); /** Configure kernel * diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h index 89f3958417..c9817a63c1 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h @@ -32,6 +32,7 @@ namespace arm_compute { class ITensor; +class ITensorInfo; /**Basic function to run @ref NEDepthConvertLayerKernel */ class NEDepthConvertLayer : public INESimpleFunctionNoBorder @@ -43,6 +44,8 @@ public: NEDepthConvertLayer(const NEDepthConvertLayer &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers)*/ const NEDepthConvertLayer &operator=(const NEDepthConvertLayer &) = delete; + /** Default destructor */ + ~NEDepthConvertLayer() = default; /** Initialize the function's source, destination * * Valid conversions Input -> Output : diff --git a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h index 22bbd6e716..51f7ff7770 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,7 +26,6 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" @@ -34,11 +33,24 @@ namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; /** Basic function to run @ref NEDepthToSpaceLayerKernel. */ class NEDepthToSpaceLayer : public INESimpleFunctionNoBorder { public: + /** Constructor */ + NEDepthToSpaceLayer() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthToSpaceLayer(const NEDepthToSpaceLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthToSpaceLayer &operator=(const NEDepthToSpaceLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEDepthToSpaceLayer(NEDepthToSpaceLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEDepthToSpaceLayer &operator=(NEDepthToSpaceLayer &&) = delete; + /** Default destructor */ + ~NEDepthToSpaceLayer() = default; /** Set the input and output tensors. * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h index c6b98ed435..dc70aec7ff 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h @@ -24,17 +24,16 @@ #ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H #define ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H -#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEPermute.h" #include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h" +#include namespace arm_compute { // Forward declarations class ITensor; +class NEDepthwiseConvolutionLayerNativeKernel; /** Function to execute a depthwise convolution. */ @@ -51,6 +50,8 @@ public: NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete; /** Default move assignment operator */ NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default; + /** Default destructor */ + ~NEDepthwiseConvolutionLayer(); /** Initialize the function's source, destination, weights and convolution information. * * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32 @@ -133,6 +134,8 @@ private: NEDepthwiseConvolutionLayerOptimizedInternal &operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete; /** Default move assignment operator */ NEDepthwiseConvolutionLayerOptimizedInternal &operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default; + /** Default destructor */ + ~NEDepthwiseConvolutionLayerOptimizedInternal() = default; /** Initialize the function's source, destination, kernels and border_size. * * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling). @@ -170,25 +173,23 @@ private: void prepare() override; private: - MemoryGroup _memory_group; - NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func; - NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel; - NEFillBorderKernel _border_handler; - NEPermute _permute_input; - NEPermute _permute_weights; - NEPermute _permute_output; - NEActivationLayer _activationlayer_function; - Tensor _accumulator; - Tensor _permuted_input; - Tensor _permuted_weights; - Tensor _permuted_output; - const ITensor *_original_weights; - bool _has_bias; - bool _is_quantized; - bool _is_nchw; - bool _permute; - bool _is_activationlayer_enabled; - bool _is_prepared; + MemoryGroup _memory_group; + NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func; + NEPermute _permute_input; + NEPermute _permute_weights; + NEPermute _permute_output; + NEActivationLayer _activationlayer_function; + Tensor _accumulator; + Tensor _permuted_input; + Tensor _permuted_weights; + Tensor _permuted_output; + const ITensor *_original_weights; + bool _has_bias; + bool _is_quantized; + bool _is_nchw; + bool _permute; + bool _is_activationlayer_enabled; + bool _is_prepared; }; /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel: @@ -209,6 +210,8 @@ private: NEDepthwiseConvolutionLayerGeneric &operator=(const NEDepthwiseConvolutionLayerGeneric &) = delete; /** Default move assignment operator */ NEDepthwiseConvolutionLayerGeneric &operator=(NEDepthwiseConvolutionLayerGeneric &&) = default; + /** Default destructor */ + ~NEDepthwiseConvolutionLayerGeneric() = default; /** Initialize the function's source, destination, weights and convolution information. * * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling). @@ -248,18 +251,18 @@ private: void prepare() override; private: - NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel; - NEPermute _permute_input; - NEPermute _permute_weights; - NEPermute _permute_output; - NEActivationLayer _activationlayer_function; - Tensor _permuted_input; - Tensor _permuted_weights; - Tensor _permuted_output; - bool _is_prepared; - bool _is_nchw; - bool _is_activationlayer_enabled; - const ITensor *_original_weights; + std::unique_ptr _depthwise_conv_kernel; + NEPermute _permute_input; + NEPermute _permute_weights; + NEPermute _permute_output; + NEActivationLayer _activationlayer_function; + Tensor _permuted_input; + Tensor _permuted_weights; + Tensor _permuted_output; + bool _is_prepared; + bool _is_nchw; + bool _is_activationlayer_enabled; + const ITensor *_original_weights; }; DepthwiseConvolutionFunction _depth_conv_func; diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h index 77295bc089..f52d709c74 100644 --- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h @@ -32,6 +32,7 @@ namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; /** Basic function to run @ref NEDequantizationLayerKernel that dequantizes an input tensor */ class NEDequantizationLayer : public INESimpleFunctionNoBorder diff --git a/arm_compute/runtime/NEON/functions/NEDerivative.h b/arm_compute/runtime/NEON/functions/NEDerivative.h index 8eb21425ac..7d852d0ffe 100644 --- a/arm_compute/runtime/NEON/functions/NEDerivative.h +++ b/arm_compute/runtime/NEON/functions/NEDerivative.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,16 +24,16 @@ #ifndef ARM_COMPUTE_NEDERIVATIVE_H #define ARM_COMPUTE_NEDERIVATIVE_H -#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" -#include +#include namespace arm_compute { class ITensor; +class NEDerivativeKernel; +class NEFillBorderKernel; /** Basic function to execute first order derivative operator. This function calls the following NEON kernels: * @@ -46,6 +46,16 @@ class NEDerivative : public IFunction public: /** Default constructor */ NEDerivative(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDerivative(const NEDerivative &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDerivative &operator=(const NEDerivative &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEDerivative(NEDerivative &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEDerivative &operator=(NEDerivative &&) = delete; + /** Default destructor */ + ~NEDerivative(); /** Initialise the function's source, destinations and border mode. * * @note At least one of output_x or output_y must be not NULL. @@ -63,8 +73,8 @@ public: void run() override; private: - NEDerivativeKernel _kernel; /**< Derivative kernel */ - NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */ + std::unique_ptr _kernel; /**< Derivative kernel */ + std::unique_ptr _border_handler; /**< Kernel to handle tensor borders */ }; } #endif /* ARM_COMPUTE_NEDERIVATIVE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h index e0431b2b31..d5c1f0ab6f 100644 --- a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h @@ -53,6 +53,8 @@ public: NEDetectionPostProcessLayer(const NEDetectionPostProcessLayer &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ NEDetectionPostProcessLayer &operator=(const NEDetectionPostProcessLayer &) = delete; + /** Default destructor */ + ~NEDetectionPostProcessLayer() = default; /** Configure the detection output layer NE function * * @param[in] input_box_encoding The bounding box input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32. diff --git a/arm_compute/runtime/NEON/functions/NEDilate.h b/arm_compute/runtime/NEON/functions/NEDilate.h index 6dae2c7029..33be5c8fba 100644 --- a/arm_compute/runtime/NEON/functions/NEDilate.h +++ b/arm_compute/runtime/NEON/functions/NEDilate.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h index d1c811c363..5b6ed55be2 100644 --- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h @@ -24,9 +24,6 @@ #ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H #define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -38,6 +35,10 @@ namespace arm_compute { +class NEDirectConvolutionLayerOutputStageKernel; +class NEDirectConvolutionLayerKernel; +class NEFillBorderKernel; + /** Function to run the direct convolution. * * This function calls the following NEON kernels: @@ -51,6 +52,16 @@ class NEDirectConvolutionLayer : public IFunction public: /** Constructor */ NEDirectConvolutionLayer(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayer(const NEDirectConvolutionLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayer &operator=(const NEDirectConvolutionLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEDirectConvolutionLayer(NEDirectConvolutionLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEDirectConvolutionLayer &operator=(NEDirectConvolutionLayer &&) = delete; + /** Default destructor */ + ~NEDirectConvolutionLayer(); /** Set the input, weights, biases and output tensors. * * @note: DirectConvolution only works in the following configurations: @@ -97,16 +108,16 @@ public: void run() override; private: - MemoryGroup _memory_group; - NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel; - NEDirectConvolutionLayerKernel _conv_kernel; - NEFillBorderKernel _input_border_handler; - NEActivationLayer _activationlayer_function; - Tensor _accumulator; - bool _has_bias; - bool _is_activationlayer_enabled; - unsigned int _dim_split; - bool _is_padding_required; + MemoryGroup _memory_group; + std::unique_ptr _output_stage_kernel; + std::unique_ptr _conv_kernel; + std::unique_ptr _input_border_handler; + NEActivationLayer _activationlayer_function; + Tensor _accumulator; + bool _has_bias; + bool _is_activationlayer_enabled; + unsigned int _dim_split; + bool _is_padding_required; }; } #endif /* ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h index 8b3301889a..46a7316705 100644 --- a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h +++ b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h @@ -24,11 +24,13 @@ #ifndef ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H #define ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to perform inverse square root on an input tensor. */ class NERsqrtLayer : public INESimpleFunctionNoBorder diff --git a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h b/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h index 5c0c323591..36c4902c04 100644 --- a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h +++ b/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,6 @@ #ifndef ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H #define ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H -#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h" -#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" -#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h" #include "arm_compute/runtime/Distribution1D.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/Lut.h" @@ -36,6 +33,9 @@ namespace arm_compute { class ITensor; +class NEHistogramKernel; +class NECumulativeDistributionKernel; +class NETableLookupKernel; using IImage = ITensor; /** Basic function to execute histogram equalization. This function calls the following NEON kernels: @@ -50,6 +50,16 @@ class NEEqualizeHistogram : public IFunction public: /** Default Constructor. */ NEEqualizeHistogram(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEqualizeHistogram(const NEEqualizeHistogram &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEqualizeHistogram &operator=(const NEEqualizeHistogram &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEEqualizeHistogram(NEEqualizeHistogram &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEEqualizeHistogram &operator=(NEEqualizeHistogram &&) = delete; + /** Default destructor */ + ~NEEqualizeHistogram(); /** Initialise the kernel's inputs. * * @note Currently the width of the input image must be a multiple of 16. @@ -63,15 +73,15 @@ public: void run() override; private: - NEHistogramKernel _histogram_kernel; /**< Kernel that calculates the histogram of input. */ - NECumulativeDistributionKernel _cd_histogram_kernel; /**< Kernel that calculates the cumulative distribution + std::unique_ptr _histogram_kernel; /**< Kernel that calculates the histogram of input. */ + std::unique_ptr _cd_histogram_kernel; /**< Kernel that calculates the cumulative distribution and creates the relevant LookupTable. */ - NETableLookupKernel _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */ - Distribution1D _hist; /**< Distribution that holds the histogram of the input image. */ - Distribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */ - Lut _cd_lut; /**< Holds the equalization lookuptable. */ - static constexpr uint32_t nr_bins{ 256 }; /**< Histogram bins of the internal histograms. */ - static constexpr uint32_t max_range{ nr_bins - 1 }; /**< Histogram range of the internal histograms. */ + std::unique_ptr _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */ + Distribution1D _hist; /**< Distribution that holds the histogram of the input image. */ + Distribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */ + Lut _cd_lut; /**< Holds the equalization lookuptable. */ + static constexpr uint32_t nr_bins{ 256 }; /**< Histogram bins of the internal histograms. */ + static constexpr uint32_t max_range{ nr_bins - 1 }; /**< Histogram range of the internal histograms. */ }; } #endif /*ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H */ diff --git a/arm_compute/runtime/NEON/functions/NEErode.h b/arm_compute/runtime/NEON/functions/NEErode.h index 3e84c2b758..e2d76c1e1d 100644 --- a/arm_compute/runtime/NEON/functions/NEErode.h +++ b/arm_compute/runtime/NEON/functions/NEErode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NEFFT1D.h b/arm_compute/runtime/NEON/functions/NEFFT1D.h index 312b46b10f..4b6cc3fd18 100644 --- a/arm_compute/runtime/NEON/functions/NEFFT1D.h +++ b/arm_compute/runtime/NEON/functions/NEFFT1D.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,19 +24,21 @@ #ifndef ARM_COMPUTE_NEFFT1D_H #define ARM_COMPUTE_NEFFT1D_H -#include "arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h" -#include "arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h" -#include "arm_compute/core/NEON/kernels/NEFFTScaleKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/FunctionDescriptors.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/Tensor.h" +#include + namespace arm_compute { // Forward declaration class ITensor; +class NEFFTDigitReverseKernel; +class NEFFTRadixStageKernel; +class NEFFTScaleKernel; /** Basic function to execute one dimensional FFT. This function calls the following NEON kernels: * @@ -49,6 +51,16 @@ class NEFFT1D : public IFunction public: /** Default Constructor */ NEFFT1D(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFT1D(const NEFFT1D &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFT1D &operator=(const NEFFT1D &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEFFT1D(NEFFT1D &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEFFT1D &operator=(NEFFT1D &&) = delete; + /** Default destructor */ + ~NEFFT1D(); /** Initialise the function's source and destinations. * * @param[in] input Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor). @@ -71,15 +83,15 @@ public: void run() override; protected: - MemoryGroup _memory_group; - NEFFTDigitReverseKernel _digit_reverse_kernel; - std::vector _fft_kernels; - NEFFTScaleKernel _scale_kernel; - Tensor _digit_reversed_input; - Tensor _digit_reverse_indices; - unsigned int _num_ffts; - unsigned int _axis; - bool _run_scale; + MemoryGroup _memory_group; + std::unique_ptr _digit_reverse_kernel; + std::vector> _fft_kernels; + std::unique_ptr _scale_kernel; + Tensor _digit_reversed_input; + Tensor _digit_reverse_indices; + unsigned int _num_ffts; + unsigned int _axis; + bool _run_scale; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEFFT1D_H */ diff --git a/arm_compute/runtime/NEON/functions/NEFFT2D.h b/arm_compute/runtime/NEON/functions/NEFFT2D.h index efcce2e9a4..18e72c1a2f 100644 --- a/arm_compute/runtime/NEON/functions/NEFFT2D.h +++ b/arm_compute/runtime/NEON/functions/NEFFT2D.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,6 +46,16 @@ class NEFFT2D : public IFunction public: /** Default Constructor */ NEFFT2D(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFT2D(const NEFFT2D &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFT2D &operator=(const NEFFT2D &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEFFT2D(NEFFT2D &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEFFT2D &operator=(NEFFT2D &&) = delete; + /** Default destructor */ + ~NEFFT2D(); /** Initialise the function's source and destinations * * @param[in] input Source tensor. Data types supported: F32. diff --git a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h index dd57900f2a..b3e98fc2d6 100644 --- a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -69,6 +69,8 @@ public: NEFFTConvolutionLayer &operator=(const NEFFTConvolutionLayer &) = delete; /** Default move assignment operator */ NEFFTConvolutionLayer &operator=(NEFFTConvolutionLayer &&) = default; + /** Default destructor */ + ~NEFFTConvolutionLayer(); /** Set the input and output tensors. * * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout diff --git a/arm_compute/runtime/NEON/functions/NEFastCorners.h b/arm_compute/runtime/NEON/functions/NEFastCorners.h index cc69e77ebb..025038bb28 100644 --- a/arm_compute/runtime/NEON/functions/NEFastCorners.h +++ b/arm_compute/runtime/NEON/functions/NEFastCorners.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,10 +24,6 @@ #ifndef ARM_COMPUTE_NEFASTCORNERS_H #define ARM_COMPUTE_NEFASTCORNERS_H -#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/Array.h" #include "arm_compute/runtime/IFunction.h" @@ -41,6 +37,10 @@ namespace arm_compute { class ITensor; +class NENonMaximaSuppression3x3Kernel; +class NEFastCornersKernel; +class NEFillBorderKernel; +class NEFillArrayKernel; using IImage = ITensor; /** Basic function to execute fast corners. This function call the following NEON kernels: @@ -55,6 +55,16 @@ class NEFastCorners : public IFunction public: /** Constructor */ NEFastCorners(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFastCorners(const NEFastCorners &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFastCorners &operator=(const NEFastCorners &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEFastCorners(NEFastCorners &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEFastCorners &operator=(NEFastCorners &&) = delete; + /** Default destructor */ + ~NEFastCorners(); /** Initialize the function's source, destination, conv and border_mode. * * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) @@ -71,14 +81,14 @@ public: void run() override; private: - MemoryGroup _memory_group; - NEFastCornersKernel _fast_corners_kernel; - NEFillBorderKernel _border_handler; - NENonMaximaSuppression3x3Kernel _nonmax_kernel; - NEFillArrayKernel _fill_kernel; - Image _output; - Image _suppressed; - bool _non_max; + MemoryGroup _memory_group; + std::unique_ptr _fast_corners_kernel; + std::unique_ptr _border_handler; + std::unique_ptr _nonmax_kernel; + std::unique_ptr _fill_kernel; + Image _output; + Image _suppressed; + bool _non_max; }; } #endif /*ARM_COMPUTE_NEFASTCORNERS_H */ diff --git a/arm_compute/runtime/NEON/functions/NEFill.h b/arm_compute/runtime/NEON/functions/NEFill.h index 1c3c546c68..14d690f419 100644 --- a/arm_compute/runtime/NEON/functions/NEFill.h +++ b/arm_compute/runtime/NEON/functions/NEFill.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_NEFILL_H #define ARM_COMPUTE_NEFILL_H -#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h index 3ac23be731..e9a08ef7ec 100644 --- a/arm_compute/runtime/NEON/functions/NEFillBorder.h +++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,15 +24,16 @@ #ifndef ARM_COMPUTE_NEFILLBORDER_H #define ARM_COMPUTE_NEFILLBORDER_H -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" +#include namespace arm_compute { // Forward declaration class ITensor; +class NEFillBorderKernel; /** Basic function to run @ref NEFillBorderKernel */ class NEFillBorder : public IFunction @@ -53,7 +54,7 @@ public: void run() override; private: - NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */ + std::unique_ptr _border_handler; /**< Kernel to handle image borders */ }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEFILLBORDER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h index 73da254ef5..9f0d5226de 100644 --- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,7 @@ namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to execute flatten layer kernel. */ class NEFlattenLayer : public INESimpleFunctionNoBorder diff --git a/arm_compute/runtime/NEON/functions/NEFloor.h b/arm_compute/runtime/NEON/functions/NEFloor.h index 12f0ee20ba..7f4248eadb 100644 --- a/arm_compute/runtime/NEON/functions/NEFloor.h +++ b/arm_compute/runtime/NEON/functions/NEFloor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,6 +31,7 @@ namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NEFloorKernel */ class NEFloor : public INESimpleFunctionNoBorder diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index 21df3c4aef..3ab3d81262 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -26,25 +26,36 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h" +#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/Tensor.h" namespace arm_compute { +class NEFlattenLayerKernel; + /** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels: - * - * -# @ref NETransposeKernel * * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. */ class NEFullyConnectedLayerReshapeWeights : public INESimpleFunctionNoBorder { public: + /** Constructor */ + NEFullyConnectedLayerReshapeWeights() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFullyConnectedLayerReshapeWeights(const NEFullyConnectedLayerReshapeWeights &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFullyConnectedLayerReshapeWeights &operator=(const NEFullyConnectedLayerReshapeWeights &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEFullyConnectedLayerReshapeWeights(NEFullyConnectedLayerReshapeWeights &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEFullyConnectedLayerReshapeWeights &operator=(NEFullyConnectedLayerReshapeWeights &&) = delete; + /** Default destructor */ + ~NEFullyConnectedLayerReshapeWeights() = default; /** Set the input and output tensors. * * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. @@ -122,6 +133,8 @@ public: NEFullyConnectedLayer &operator=(const NEFullyConnectedLayer &) = delete; /** Default move assignment operator */ NEFullyConnectedLayer &operator=(NEFullyConnectedLayer &&) = default; + /** Default destructor */ + ~NEFullyConnectedLayer(); /** Set the input and output tensors. * * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. @@ -168,7 +181,7 @@ private: MemoryGroup _memory_group; IWeightsManager *_weights_manager; - NEFlattenLayerKernel _flatten_kernel; + std::unique_ptr _flatten_kernel; NEConvertFullyConnectedWeights _convert_weights; weights_transformations::NEConvertFullyConnectedWeightsManaged _convert_weights_managed; NEFullyConnectedLayerReshapeWeights _reshape_weights_function; diff --git a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h index 6b561352a6..5dc804e240 100644 --- a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h +++ b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,6 @@ #define ARM_COMPUTE_NEFUSEBATCHNORMALIZATION_H #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" @@ -33,6 +32,7 @@ namespace arm_compute { // Forward declarations class ITensor; +class NEFuseBatchNormalizationKernel; /** Basic function to fuse the batch normalization node to a preceding convolution node */ class NEFuseBatchNormalization : public IFunction @@ -49,7 +49,7 @@ public: /** Allow instances of this class to be moved */ NEFuseBatchNormalization &operator=(NEFuseBatchNormalization &&) = default; /** Default destructor */ - ~NEFuseBatchNormalization() = default; + ~NEFuseBatchNormalization(); /** Set the input and output tensors. * * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC @@ -94,7 +94,7 @@ public: void run() override; private: - NEFuseBatchNormalizationKernel _fuse_bn_kernel; + std::unique_ptr _fuse_bn_kernel; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEFUSEBATCHNORMALIZATION_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index 8d65fb5303..645ab56417 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -24,11 +24,6 @@ #ifndef ARM_COMPUTE_NEGEMM_H #define ARM_COMPUTE_NEGEMM_H -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/IWeightsManager.h" @@ -38,8 +33,14 @@ #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/Tensor.h" +#include + namespace arm_compute { +class NEGEMMInterleave4x4Kernel; +class NEGEMMMatrixAdditionKernel; +class NEGEMMMatrixMultiplyKernel; +class NEGEMMTranspose1xWKernel; /** Basic function to execute GEMM on NEON. This function calls the following NEON kernels: * * If optimized assembly is available: @@ -69,6 +70,8 @@ public: NEGEMM &operator=(const NEGEMM &) = delete; /** Default move assignment operator */ NEGEMM &operator=(NEGEMM &&) = default; + /** Default destructor */ + ~NEGEMM(); /** Initialise the kernel's inputs, output * * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. @@ -104,16 +107,16 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - NEGEMMInterleave4x4Kernel _interleave_kernel; - NEGEMMTranspose1xWKernel _transpose_kernel; - NEGEMMMatrixMultiplyKernel _mm_kernel; - NEGEMMAssemblyDispatch _asm_glue; - NEGEMMMatrixAdditionKernel _ma_kernel; - NEActivationLayer _alpha_scale_func; - NEArithmeticAddition _add_bias; - NEActivationLayer _activation_func; + MemoryGroup _memory_group; + IWeightsManager *_weights_manager; + std::unique_ptr _interleave_kernel; + std::unique_ptr _transpose_kernel; + std::unique_ptr _mm_kernel; + NEGEMMAssemblyDispatch _asm_glue; + std::unique_ptr _ma_kernel; + NEActivationLayer _alpha_scale_func; + NEArithmeticAddition _add_bias; + NEActivationLayer _activation_func; Tensor _tmp_a; Tensor _tmp_b; diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h index b3f5c51010..6bcf56fb0b 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h @@ -26,10 +26,6 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" -#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" -#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -44,6 +40,9 @@ namespace arm_compute { class ITensor; +class NECol2ImKernel; +class NEIm2ColKernel; +class NEWeightsReshapeKernel; /** Function to reshape the weights. This function calls the following kernel: * -# @ref NEWeightsReshapeKernel @@ -61,6 +60,8 @@ public: NEConvolutionLayerReshapeWeights &operator=(const NEConvolutionLayerReshapeWeights &) = delete; /** Default move assignment operator */ NEConvolutionLayerReshapeWeights &operator=(NEConvolutionLayerReshapeWeights &&) = default; + /** Default destructor */ + ~NEConvolutionLayerReshapeWeights(); /** Set the input and output tensors. * * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. @@ -88,7 +89,7 @@ public: void run() override; private: - NEWeightsReshapeKernel _weights_reshape_kernel; + std::unique_ptr _weights_reshape_kernel; }; namespace weights_transformations @@ -97,6 +98,18 @@ namespace weights_transformations class NEConvolutionLayerReshapeWeightsTransform : public ITransformWeights { public: + /** Constructor */ + NEConvolutionLayerReshapeWeightsTransform() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionLayerReshapeWeightsTransform(const NEConvolutionLayerReshapeWeightsTransform &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionLayerReshapeWeightsTransform &operator=(const NEConvolutionLayerReshapeWeightsTransform &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConvolutionLayerReshapeWeightsTransform(NEConvolutionLayerReshapeWeightsTransform &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConvolutionLayerReshapeWeightsTransform &operator=(NEConvolutionLayerReshapeWeightsTransform &&) = delete; + /** Default destructor */ + ~NEConvolutionLayerReshapeWeightsTransform() = default; void configure(const ITensor *input, const ITensor *biases) { _bias_bit = (biases != nullptr) ? 1 : 0; @@ -160,6 +173,8 @@ public: NEGEMMConvolutionLayer &operator=(const NEGEMMConvolutionLayer &) = delete; /** Default move assignment operator */ NEGEMMConvolutionLayer &operator=(NEGEMMConvolutionLayer &&) = default; + /** Default destructor */ + ~NEGEMMConvolutionLayer(); /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -253,10 +268,10 @@ private: IWeightsManager *_weights_manager; NEConvolutionLayerReshapeWeights _reshape_weights; weights_transformations::NEConvolutionLayerReshapeWeightsTransform _reshape_weights_managed; - NEIm2ColKernel _im2col_kernel; + std::unique_ptr _im2col_kernel; NEGEMM _mm_gemm; NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - NECol2ImKernel _col2im_kernel; + std::unique_ptr _col2im_kernel; NEReshapeLayer _reshape_layer; const ITensor *_original_weights; diff --git a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h index 58cb383c67..7195c71063 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h index 9813b34661..961b1901e7 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H #define ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -37,6 +36,9 @@ namespace arm_compute { // Forward declarations class ITensor; +class NEGEMMInterleave4x4Kernel; +class NEGEMMTranspose1xWKernel; +class NEGEMMLowpMatrixMultiplyKernel; /** Basic function to execute matrix multiply assembly kernels. */ class NEGEMMLowpAssemblyMatrixMultiplyCore : public IFunction @@ -44,6 +46,9 @@ class NEGEMMLowpAssemblyMatrixMultiplyCore : public IFunction public: /** Constructor */ NEGEMMLowpAssemblyMatrixMultiplyCore(std::shared_ptr memory_manager = nullptr); + /** Destructor */ + ~NEGEMMLowpAssemblyMatrixMultiplyCore(); + /** Initialise the kernel's inputs, output * * @param[in] a First input tensor (Matrix A). Data type supported: U8, S8. @@ -57,13 +62,13 @@ public: void run() override; private: - MemoryGroup _memory_group; - NEGEMMAssemblyDispatch _asm_glue; - std::unique_ptr _mm_kernel; - std::unique_ptr _mtx_a_reshape_kernel; - std::unique_ptr _mtx_b_reshape_kernel; - Tensor _tmp_a; - Tensor _tmp_b; + MemoryGroup _memory_group; + NEGEMMAssemblyDispatch _asm_glue; + std::unique_ptr _mm_kernel; + std::unique_ptr _mtx_a_reshape_kernel; + std::unique_ptr _mtx_b_reshape_kernel; + Tensor _tmp_a; + Tensor _tmp_b; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h index 01720f05fa..cb1d6bd782 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h @@ -25,15 +25,6 @@ #define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H #include "NEActivationLayer.h" -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" -#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -45,6 +36,15 @@ namespace arm_compute { class ITensor; +class NEConvertQuantizedSignednessKernel; +class NEConvertQuantizedSignednessKernel; +class NEGEMMInterleave4x4Kernel; +class NEGEMMLowpMatrixMultiplyKernel; +class NEGEMMLowpOffsetContributionKernel; +class NEGEMMLowpOffsetContributionOutputStageKernel; +class NEGEMMLowpMatrixAReductionKernel; +class NEGEMMLowpMatrixBReductionKernel; +class NEGEMMTranspose1xWKernel; /** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following NEON kernels if the DOT product instruction is not available: * @@ -72,6 +72,8 @@ public: NEGEMMLowpMatrixMultiplyCore &operator=(const NEGEMMLowpMatrixMultiplyCore &) = delete; /** Default move assignment operator */ NEGEMMLowpMatrixMultiplyCore &operator=(NEGEMMLowpMatrixMultiplyCore &&) = default; + /** Default destructor */ + ~NEGEMMLowpMatrixMultiplyCore(); /** Initialise the kernel's inputs, output * * @note GEMM_LOWP: low precision GEMM kernel @@ -111,19 +113,19 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - NEGEMMAssemblyDispatch _asm_glue; - NEGEMMLowpMatrixMultiplyKernel _mm_kernel; - NEGEMMInterleave4x4Kernel _mtx_a_reshape_kernel; - NEGEMMTranspose1xWKernel _mtx_b_reshape_kernel; - NEGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel; - NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel; - NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel; - NEGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel; - NEActivationLayer _activation_func; - NEConvertQuantizedSignednessKernel _convert_to_signed_asymm; - NEConvertQuantizedSignednessKernel _convert_from_signed_asymm; + MemoryGroup _memory_group; + IWeightsManager *_weights_manager; + NEGEMMAssemblyDispatch _asm_glue; + std::unique_ptr _mm_kernel; + std::unique_ptr _mtx_a_reshape_kernel; + std::unique_ptr _mtx_b_reshape_kernel; + std::unique_ptr _mtx_a_reduction_kernel; + std::unique_ptr _mtx_b_reduction_kernel; + std::unique_ptr _offset_contribution_kernel; + std::unique_ptr _offset_contribution_output_stage_kernel; + NEActivationLayer _activation_func; + std::unique_ptr _convert_to_signed_asymm; + std::unique_ptr _convert_from_signed_asymm; Tensor _vector_sum_col; Tensor _vector_sum_row; diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h index f29d5d464b..6977d27cb6 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H #define ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" /** This file contains all available output stages for GEMMLowp on NEON. @@ -37,6 +38,7 @@ namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on NEON. * @@ -69,6 +71,18 @@ class ITensor; class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public INESimpleFunctionNoBorder { public: + /** Constructor */ + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &&) = delete; + /** Default destructor */ + ~NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint(); /** Initialise the kernel's inputs, output * * @param[in] input Input tensor. Data type supported: S32 @@ -129,6 +143,18 @@ public: class NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint : public INESimpleFunctionNoBorder { public: + /** Constructor */ + NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &&) = delete; + /** Default destructor */ + ~NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint(); /** Initialise the kernel's inputs, output * * @param[in] input Input tensor. Data type supported: S32 @@ -189,6 +215,18 @@ public: class NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint : public INESimpleFunctionNoBorder { public: + /** Constructor */ + NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &&) = delete; + /** Default destructor */ + ~NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint(); /** Initialise the kernel's inputs, output * * @param[in] input Input tensor. Data type supported: S32 @@ -230,6 +268,18 @@ public: class NEGEMMLowpOutputStage : public INESimpleFunctionNoBorder { public: + /** Constructor */ + NEGEMMLowpOutputStage() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMLowpOutputStage(const NEGEMMLowpOutputStage &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMLowpOutputStage &operator=(const NEGEMMLowpOutputStage &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEGEMMLowpOutputStage(NEGEMMLowpOutputStage &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEGEMMLowpOutputStage &operator=(NEGEMMLowpOutputStage &&) = delete; + /** Default destructor */ + ~NEGEMMLowpOutputStage(); /** Initialise the kernel's inputs, output * * @param[in] input Input tensor. Data type supported: S32 diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h index 983c95d732..723a638d76 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,12 +24,14 @@ #ifndef ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H #define ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; /** Basic function to execute NEGEMMTranspose1xWKernel. This function calls the following NEON kernels: * @@ -39,6 +41,18 @@ class ITensor; class NEGEMMTranspose1xW : public INESimpleFunctionNoBorder { public: + /** Constructor */ + NEGEMMTranspose1xW() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMTranspose1xW(const NEGEMMTranspose1xW &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMTranspose1xW &operator=(const NEGEMMTranspose1xW &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEGEMMTranspose1xW(NEGEMMTranspose1xW &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEGEMMTranspose1xW &operator=(NEGEMMTranspose1xW &&) = delete; + /** Default destructor */ + ~NEGEMMTranspose1xW() = default; /** Initialise the kernel's inputs, output * * @param[in] input First input tensor. Data type supported: All diff --git a/arm_compute/runtime/NEON/functions/NEGather.h b/arm_compute/runtime/NEON/functions/NEGather.h index b872c44443..a5e0461227 100644 --- a/arm_compute/runtime/NEON/functions/NEGather.h +++ b/arm_compute/runtime/NEON/functions/NEGather.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,6 +32,7 @@ namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; /** Basic function to run @ref NEGatherKernel */ class NEGather : public INESimpleFunctionNoBorder diff --git a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h b/arm_compute/runtime/NEON/functions/NEGaussian3x3.h index 54fe91b975..db533858ee 100644 --- a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h +++ b/arm_compute/runtime/NEON/functions/NEGaussian3x3.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h index 2e042e2307..3d933bbd3d 100644 --- a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h +++ b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_NEGAUSSIAN5x5_H #define ARM_COMPUTE_NEGAUSSIAN5x5_H -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -38,6 +36,9 @@ namespace arm_compute { class ITensor; +class NEGaussian5x5HorKernel; +class NEGaussian5x5VertKernel; +class NEFillBorderKernel; /** Basic function to execute gaussian filter 5x5. This function calls the following NEON kernels: * @@ -52,6 +53,16 @@ public: /** Default constructor */ NEGaussian5x5(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussian5x5(const NEGaussian5x5 &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussian5x5 &operator=(const NEGaussian5x5 &) = delete; + /** Allow instances of this class to be moved */ + NEGaussian5x5(NEGaussian5x5 &&) = default; + /** Allow instances of this class to be moved */ + NEGaussian5x5 &operator=(NEGaussian5x5 &&) = default; + /** Default destructor */ + ~NEGaussian5x5(); /** Initialise the function's input, output and border mode. * * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) @@ -65,11 +76,11 @@ public: void run() override; protected: - MemoryGroup _memory_group; /**< Function memory group */ - NEGaussian5x5HorKernel _kernel_hor; /**< kernel for horizontal pass */ - NEGaussian5x5VertKernel _kernel_vert; /**< kernel for vertical pass */ - Tensor _tmp; /**< temporary buffer for output of horizontal pass */ - NEFillBorderKernel _border_handler; /**< kernel to handle tensor borders */ + MemoryGroup _memory_group; /**< Function memory group */ + std::unique_ptr _kernel_hor; /**< kernel for horizontal pass */ + std::unique_ptr _kernel_vert; /**< kernel for vertical pass */ + Tensor _tmp; /**< temporary buffer for output of horizontal pass */ + std::unique_ptr _border_handler; /**< kernel to handle tensor borders */ }; } #endif /*ARM_COMPUTE_NEGAUSSIAN5x5_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h b/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h index d82f763f95..c82de0f4c2 100644 --- a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h +++ b/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,6 @@ #define ARM_COMPUTE_NEGAUSSIANPYRAMID_H #include "arm_compute/core/IPyramid.h" -#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" @@ -39,6 +38,9 @@ namespace arm_compute { class ITensor; +class NEGaussianPyramidHorKernel; +class NEGaussianPyramidVertKernel; +class NEFillBorderKernel; /** Common interface for all Gaussian pyramid functions */ class NEGaussianPyramid : public IFunction @@ -85,16 +87,26 @@ class NEGaussianPyramidHalf : public NEGaussianPyramid public: /** Constructor */ NEGaussianPyramidHalf(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidHalf(const NEGaussianPyramidHalf &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidHalf &operator=(const NEGaussianPyramidHalf &) = delete; + /** Allow instances of this class to be moved */ + NEGaussianPyramidHalf(NEGaussianPyramidHalf &&) = default; + /** Allow instances of this class to be moved */ + NEGaussianPyramidHalf &operator=(NEGaussianPyramidHalf &&) = default; + /** Default destructor */ + ~NEGaussianPyramidHalf(); // Inherited methods overridden: void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; void run() override; private: - std::vector _horizontal_border_handler; - std::vector _vertical_border_handler; - std::vector _horizontal_reduction; - std::vector _vertical_reduction; + std::vector> _horizontal_border_handler; + std::vector> _vertical_border_handler; + std::vector> _horizontal_reduction; + std::vector> _vertical_reduction; }; /** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following NEON kernels and functions: @@ -109,6 +121,16 @@ class NEGaussianPyramidOrb : public NEGaussianPyramid public: /** Constructor */ NEGaussianPyramidOrb(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidOrb(const NEGaussianPyramidOrb &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidOrb &operator=(const NEGaussianPyramidOrb &) = delete; + /** Allow instances of this class to be moved */ + NEGaussianPyramidOrb(NEGaussianPyramidOrb &&) = default; + /** Allow instances of this class to be moved */ + NEGaussianPyramidOrb &operator=(NEGaussianPyramidOrb &&) = default; + /** Default destructor */ + ~NEGaussianPyramidOrb(); // Inherited methods overridden: void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; diff --git a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h index f937832c0e..613f0d1c47 100644 --- a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h @@ -24,17 +24,17 @@ #ifndef ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H #define ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H -#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h" -#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h" -#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CPP/CPPScheduler.h" #include "arm_compute/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h" +#include "arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h" +#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEPadLayer.h" +#include "arm_compute/runtime/NEON/functions/NEPermute.h" +#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h" #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" #include "arm_compute/runtime/Tensor.h" @@ -67,6 +67,8 @@ public: NEGenerateProposalsLayer(const NEGenerateProposalsLayer &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ NEGenerateProposalsLayer &operator=(const NEGenerateProposalsLayer &) = delete; + /** Default destructor */ + ~NEGenerateProposalsLayer(); /** Set the input and output tensors. * @@ -112,16 +114,16 @@ private: MemoryGroup _memory_group; // Neon kernels - NEPermuteKernel _permute_deltas_kernel; - NEReshapeLayer _flatten_deltas; - NEPermuteKernel _permute_scores_kernel; - NEReshapeLayer _flatten_scores; - NEComputeAllAnchorsKernel _compute_anchors_kernel; - NEBoundingBoxTransformKernel _bounding_box_kernel; - NEPadLayerKernel _pad_kernel; - NEDequantizationLayerKernel _dequantize_anchors; - NEDequantizationLayerKernel _dequantize_deltas; - NEQuantizationLayerKernel _quantize_all_proposals; + NEPermute _permute_deltas; + NEReshapeLayer _flatten_deltas; + NEPermute _permute_scores; + NEReshapeLayer _flatten_scores; + NEComputeAllAnchors _compute_anchors; + NEBoundingBoxTransform _bounding_box; + NEPadLayer _pad; + NEDequantizationLayer _dequantize_anchors; + NEDequantizationLayer _dequantize_deltas; + NEQuantizationLayer _quantize_all_proposals; // CPP functions CPPBoxWithNonMaximaSuppressionLimit _cpp_nms; diff --git a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h b/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h index 9b6fc4737b..c900040982 100644 --- a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h +++ b/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_NEHOGDESCRIPTOR_H #define ARM_COMPUTE_NEHOGDESCRIPTOR_H -#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -36,6 +35,9 @@ namespace arm_compute { class IHOG; +class NEHOGOrientationBinningKernel; +class NEHOGBlockNormalizationKernel; + /** Basic function to calculate HOG descriptor. This function calls the following NEON kernels: * * -# @ref NEHOGGradient @@ -48,6 +50,16 @@ class NEHOGDescriptor : public IFunction public: /** Default constructor */ NEHOGDescriptor(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGDescriptor(const NEHOGDescriptor &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGDescriptor &operator=(const NEHOGDescriptor &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEHOGDescriptor(NEHOGDescriptor &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEHOGDescriptor &operator=(NEHOGDescriptor &&) = delete; + /** Default destructor */ + ~NEHOGDescriptor(); /** Initialise the function's source, destination, HOG data-object and border mode * * @param[in, out] input Input tensor. Data type supported: U8 @@ -63,13 +75,13 @@ public: void run() override; private: - MemoryGroup _memory_group; - NEHOGGradient _gradient; - NEHOGOrientationBinningKernel _orient_bin; - NEHOGBlockNormalizationKernel _block_norm; - Tensor _mag; - Tensor _phase; - Tensor _hog_space; + MemoryGroup _memory_group; + NEHOGGradient _gradient; + std::unique_ptr _orient_bin; + std::unique_ptr _block_norm; + Tensor _mag; + Tensor _phase; + Tensor _hog_space; }; } diff --git a/arm_compute/runtime/NEON/functions/NEHOGDetector.h b/arm_compute/runtime/NEON/functions/NEHOGDetector.h index 6400d3c367..89224b62a0 100644 --- a/arm_compute/runtime/NEON/functions/NEHOGDetector.h +++ b/arm_compute/runtime/NEON/functions/NEHOGDetector.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,12 +24,14 @@ #ifndef ARM_COMPUTE_NEHOGDETECTOR_H #define ARM_COMPUTE_NEHOGDETECTOR_H +#include "arm_compute/core/IArray.h" #include "arm_compute/core/IHOG.h" -#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { +class ITensor; +class ITensorInfo; /** Basic function to execute HOG detector based on linear SVM. This function calls the following NEON kernel: * * -# @ref NEHOGDetectorKernel @@ -38,6 +40,18 @@ namespace arm_compute class NEHOGDetector : public INESimpleFunctionNoBorder { public: + /** Constructor */ + NEHOGDetector() = default; + /** Prevent instances of this class from being copied */ + NEHOGDetector(const NEHOGDetector &) = delete; + /** Default move constructor */ + NEHOGDetector(NEHOGDetector &&) = default; + /** Prevent instances of this class from being copied */ + NEHOGDetector &operator=(const NEHOGDetector &) = delete; + /** Default move assignment operator */ + NEHOGDetector &operator=(NEHOGDetector &&) = default; + /** Destructor */ + ~NEHOGDetector(); /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class * * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it. diff --git a/arm_compute/runtime/NEON/functions/NEHOGGradient.h b/arm_compute/runtime/NEON/functions/NEHOGGradient.h index 2d3f934f54..05a16db995 100644 --- a/arm_compute/runtime/NEON/functions/NEHOGGradient.h +++ b/arm_compute/runtime/NEON/functions/NEHOGGradient.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_NEHOGGRADIENT_H #define ARM_COMPUTE_NEHOGGRADIENT_H -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -38,6 +37,8 @@ namespace arm_compute { class ITensor; +class ICPPKernel; + /** Basic function to calculate the gradient for HOG. This function calls the following NEON kernels: * * -# @ref NEDerivative @@ -49,6 +50,16 @@ class NEHOGGradient : public IFunction public: /** Default constructor */ NEHOGGradient(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGGradient(const NEHOGGradient &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGGradient &operator=(const NEHOGGradient &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEHOGGradient(NEHOGGradient &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEHOGGradient &operator=(NEHOGGradient &&) = delete; + /** Default destructor */ + ~NEHOGGradient(); /** Initialise the function's source, destinations, phase type and border mode * * @param[in, out] input Input tensor. Data type supported: U8. @@ -65,11 +76,11 @@ public: void run() override; private: - MemoryGroup _memory_group; - NEDerivative _derivative; - std::unique_ptr _mag_phase; - Tensor _gx; - Tensor _gy; + MemoryGroup _memory_group; + NEDerivative _derivative; + std::unique_ptr _mag_phase; + Tensor _gx; + Tensor _gy; }; } #endif /*ARM_COMPUTE_NEHOGGRADIENT_H */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h index ff64afb119..0fb3edd490 100644 --- a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h +++ b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,7 +27,6 @@ #include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" #include "arm_compute/core/IArray.h" #include "arm_compute/core/IMultiHOG.h" -#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -39,6 +38,9 @@ namespace arm_compute { +class NEHOGOrientationBinningKernel; +class NEHOGBlockNormalizationKernel; + /** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following NEON kernels: * * -# @ref NEHOGGradient @@ -60,8 +62,14 @@ public: NEHOGMultiDetection(std::shared_ptr memory_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEHOGMultiDetection(const NEHOGMultiDetection &) = delete; + /** Default move constructor */ + NEHOGMultiDetection(NEHOGMultiDetection &&) = default; /** Prevent instances of this class from being copied (As this class contains pointers) */ NEHOGMultiDetection &operator=(const NEHOGMultiDetection &) = delete; + /** Default move assignment operator */ + NEHOGMultiDetection &operator=(NEHOGMultiDetection &&) = default; + /** Default destructor */ + ~NEHOGMultiDetection(); /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression * * @param[in, out] input Input tensor. Data type supported: U8 diff --git a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h index c086e3a7ce..e2dc052afc 100644 --- a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h +++ b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,8 +26,6 @@ #include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" #include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/Array.h" #include "arm_compute/runtime/IFunction.h" @@ -42,6 +40,8 @@ namespace arm_compute { class ITensor; +class NEFillBorderKernel; +class INEHarrisScoreKernel; using IImage = ITensor; /** Basic function to execute harris corners detection. This function calls the following NEON kernels and functions: @@ -68,6 +68,16 @@ public: * @param[in] memory_manager (Optional) Memory manager. */ NEHarrisCorners(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHarrisCorners(const NEHarrisCorners &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHarrisCorners &operator=(const NEHarrisCorners &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEHarrisCorners(NEHarrisCorners &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEHarrisCorners &operator=(NEHarrisCorners &&) = delete; + /** Default destructor */ + ~NEHarrisCorners(); /** Initialize the function's source, destination, conv and border_mode. * * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) @@ -94,8 +104,8 @@ private: NENonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */ CPPCornerCandidatesKernel _candidates; /**< Sort kernel */ CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */ - NEFillBorderKernel _border_gx; /**< Border handler before running harris score */ - NEFillBorderKernel _border_gy; /**< Border handler before running harris score */ + std::unique_ptr _border_gx; /**< Border handler before running harris score */ + std::unique_ptr _border_gy; /**< Border handler before running harris score */ Image _gx; /**< Source image - Gx component */ Image _gy; /**< Source image - Gy component */ Image _score; /**< Source image - Harris score */ diff --git a/arm_compute/runtime/NEON/functions/NEHistogram.h b/arm_compute/runtime/NEON/functions/NEHistogram.h index 716f2e71f9..60766ebcdc 100644 --- a/arm_compute/runtime/NEON/functions/NEHistogram.h +++ b/arm_compute/runtime/NEON/functions/NEHistogram.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,16 +24,19 @@ #ifndef ARM_COMPUTE_NEHISTOGRAM_H #define ARM_COMPUTE_NEHISTOGRAM_H -#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" #include "arm_compute/runtime/IFunction.h" #include #include #include +#include namespace arm_compute { +class ITensor; class IDistribution1D; +class NEHistogramKernel; +using IImage = ITensor; /** Basic function to run @ref NEHistogramKernel. */ class NEHistogram : public IFunction @@ -41,6 +44,16 @@ class NEHistogram : public IFunction public: /** Default Constructor. */ NEHistogram(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHistogram(const NEHistogram &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHistogram &operator=(const NEHistogram &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEHistogram(NEHistogram &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEHistogram &operator=(NEHistogram &&) = delete; + /** Default destructor */ + ~NEHistogram(); /** Initialise the kernel's inputs. * * @param[in] input Input image. Data type supported: U8. @@ -52,10 +65,10 @@ public: void run() override; private: - NEHistogramKernel _histogram_kernel; - std::vector _local_hist; - std::vector _window_lut; - size_t _local_hist_size; + std::unique_ptr _histogram_kernel; + std::vector _local_hist; + std::vector _window_lut; + size_t _local_hist_size; /** 256 possible pixel values as we handle only U8 images */ static constexpr unsigned int window_lut_default_size = 256; }; diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h index 3ea9c1cfaf..2f023f44fe 100644 --- a/arm_compute/runtime/NEON/functions/NEIm2Col.h +++ b/arm_compute/runtime/NEON/functions/NEIm2Col.h @@ -26,14 +26,16 @@ #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" -#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" #include "arm_compute/core/Size2D.h" #include "arm_compute/core/Types.h" +#include namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; +class NEIm2ColKernel; /** Basic function to run @ref NEIm2ColKernel */ class NEIm2Col : public IFunction @@ -41,6 +43,16 @@ class NEIm2Col : public IFunction public: /** Default constructor */ NEIm2Col(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIm2Col(const NEIm2Col &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIm2Col &operator=(const NEIm2Col &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEIm2Col(NEIm2Col &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEIm2Col &operator=(NEIm2Col &&) = delete; + /** Default destructor */ + ~NEIm2Col(); /** Configure the im2col NEON kernel * * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], @@ -78,8 +90,8 @@ public: void run() override; private: - NEIm2ColKernel _kernel; - unsigned int _y_dim; + std::unique_ptr _kernel; + unsigned int _y_dim; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEIM2COL_H */ diff --git a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h index 85a307c2d4..57165c94b4 100644 --- a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H #define ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H -#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -37,6 +36,7 @@ namespace arm_compute { class ITensor; +class NEInstanceNormalizationLayerKernel; /** Basic function to perform a Instance normalization. * @@ -48,6 +48,16 @@ class NEInstanceNormalizationLayer : public IFunction public: /** Constructor */ NEInstanceNormalizationLayer(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEInstanceNormalizationLayer(const NEInstanceNormalizationLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEInstanceNormalizationLayer &operator=(const NEInstanceNormalizationLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEInstanceNormalizationLayer(NEInstanceNormalizationLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEInstanceNormalizationLayer &operator=(NEInstanceNormalizationLayer &&) = delete; + /** Default destructor */ + ~NEInstanceNormalizationLayer(); /** Set the input and output tensors. * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization. @@ -75,13 +85,13 @@ public: void run() override; private: - MemoryGroup _memory_group; - NEInstanceNormalizationLayerKernel _normalization_kernel; - bool _is_nchw; - NEPermute _permute_input; - NEPermute _permute_output; - Tensor _permuted_input; - Tensor _permuted_output; + MemoryGroup _memory_group; + std::unique_ptr _normalization_kernel; + bool _is_nchw; + NEPermute _permute_input; + NEPermute _permute_output; + Tensor _permuted_input; + Tensor _permuted_output; }; } #endif /* ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEIntegralImage.h b/arm_compute/runtime/NEON/functions/NEIntegralImage.h index 6302a7adac..a04105c0b9 100644 --- a/arm_compute/runtime/NEON/functions/NEIntegralImage.h +++ b/arm_compute/runtime/NEON/functions/NEIntegralImage.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,6 +34,18 @@ class ITensor; class NEIntegralImage : public INESimpleFunction { public: + /** Constructor */ + NEIntegralImage() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIntegralImage(const NEIntegralImage &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIntegralImage &operator=(const NEIntegralImage &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEIntegralImage(NEIntegralImage &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEIntegralImage &operator=(NEIntegralImage &&) = delete; + /** Default destructor */ + ~NEIntegralImage(); /** Initialise the function's source, destinations and border mode. * * @param[in] input Source tensor. Data type supported: U8. diff --git a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h index 66750a5411..173b9d2141 100644 --- a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h +++ b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_NEL2NORMALIZELAYER_H #define ARM_COMPUTE_NEL2NORMALIZELAYER_H -#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -36,6 +35,7 @@ namespace arm_compute { class ITensor; +class NEL2NormalizeLayerKernel; /** Basic function to perform a L2 normalization on a given axis. * @@ -48,6 +48,16 @@ class NEL2NormalizeLayer : public IFunction public: /** Constructor */ NEL2NormalizeLayer(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEL2NormalizeLayer(const NEL2NormalizeLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEL2NormalizeLayer &operator=(const NEL2NormalizeLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEL2NormalizeLayer(NEL2NormalizeLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEL2NormalizeLayer &operator=(NEL2NormalizeLayer &&) = delete; + /** Default destructor */ + ~NEL2NormalizeLayer(); /** Set the input and output tensors. * * @param[in, out] input Source tensor. Data types supported: F16/F32. (Written to only for border_size != 0) @@ -72,10 +82,10 @@ public: void run() override; private: - MemoryGroup _memory_group; - NEReductionOperation _reduce_func; - NEL2NormalizeLayerKernel _normalize_kernel; - Tensor _sumsq; + MemoryGroup _memory_group; + NEReductionOperation _reduce_func; + std::unique_ptr _normalize_kernel; + Tensor _sumsq; }; } #endif /* ARM_COMPUTE_NEL2NORMALIZELAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h index 4a47dfb2cf..ef8defb827 100644 --- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h @@ -24,18 +24,17 @@ #ifndef ARM_COMPUTE_NELSTMLAYER_H #define ARM_COMPUTE_NELSTMLAYER_H -#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NECopyKernel.h" - #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" +#include "arm_compute/runtime/NEON/functions/NECopy.h" #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" #include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h" #include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" +#include "arm_compute/runtime/NEON/functions/NETranspose.h" #include "arm_compute/runtime/common/LSTMParams.h" namespace arm_compute @@ -49,6 +48,16 @@ class NELSTMLayer : public IFunction public: /** Default constructor */ NELSTMLayer(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELSTMLayer(const NELSTMLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELSTMLayer &operator=(const NELSTMLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELSTMLayer(NELSTMLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELSTMLayer &operator=(NELSTMLayer &&) = delete; + /** Default destructor */ + ~NELSTMLayer(); /** Initialize function's tensors. * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32. @@ -158,7 +167,7 @@ private: NEActivationLayer _activation_forget_gate; NEFullyConnectedLayer _fully_connected_cell_state; NEGEMM _gemm_cell_state1; - NETransposeKernel _transpose_cell_state; + NETranspose _transpose_cell_state; NEArithmeticAddition _accum_cell_state1; NEArithmeticAddition _accum_cell_state2; NEPixelWiseMultiplication _pixelwise_mul_cell_state1; @@ -173,8 +182,8 @@ private: NEPixelWiseMultiplication _pixelwise_mul_output_state2; NEFullyConnectedLayer _fully_connected_output_state; NEActivationLayer _projection_clip; - NECopyKernel _copy_cell_state; - NECopyKernel _copy_output; + NECopy _copy_cell_state; + NECopy _copy_output; NEConcatenateLayer _concat_scratch_buffer; NEConcatenateLayer _concat_inputs_forget_gate; NEConcatenateLayer _concat_weights_forget_gate; diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h index 377e173e7d..39fafef773 100644 --- a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h +++ b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -73,6 +73,8 @@ public: NELSTMLayerQuantized &operator=(const NELSTMLayerQuantized &) = delete; /** Default move assignment operator */ NELSTMLayerQuantized &operator=(NELSTMLayerQuantized &&) = default; + /** Default destructor */ + ~NELSTMLayerQuantized(); /** Initialize function's tensors. * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8. diff --git a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h index 1f317f6dd8..eecd9d59cb 100644 --- a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h +++ b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -56,6 +56,16 @@ class NELaplacianPyramid : public IFunction public: /** Constructor */ NELaplacianPyramid(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELaplacianPyramid(const NELaplacianPyramid &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELaplacianPyramid &operator=(const NELaplacianPyramid &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELaplacianPyramid(NELaplacianPyramid &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELaplacianPyramid &operator=(NELaplacianPyramid &&) = delete; + /** Default destructor */ + ~NELaplacianPyramid(); /** Initialise the function's source, destinations and border mode. * * @param[in] input Source tensor. Data type supported: U8. diff --git a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h index cc4aa0876b..20f76455da 100644 --- a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h +++ b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -62,6 +62,16 @@ class NELaplacianReconstruct : public IFunction public: /** Constructor */ NELaplacianReconstruct(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELaplacianReconstruct(const NELaplacianReconstruct &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELaplacianReconstruct &operator=(const NELaplacianReconstruct &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELaplacianReconstruct(NELaplacianReconstruct &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELaplacianReconstruct &operator=(NELaplacianReconstruct &&) = delete; + /** Default destructor */ + ~NELaplacianReconstruct(); /** Initialise the function's source, destinations and border mode. * * The Output image must have the same size as the first level of the pyramid. diff --git a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h index dbcaa297ab..e9f3e93474 100644 --- a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h @@ -26,13 +26,11 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" -#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" -#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/functions/NECol2Im.h" +#include "arm_compute/runtime/NEON/functions/NEIm2Col.h" #include "arm_compute/runtime/Tensor.h" #include @@ -40,6 +38,8 @@ namespace arm_compute { class INETensor; +class NEWeightsReshapeKernel; +class NELocallyConnectedMatrixMultiplyKernel; /** Basic function to compute the locally connected layer. This function calls the following NEON kernels: * @@ -61,6 +61,8 @@ public: NELocallyConnectedLayer &operator=(const NELocallyConnectedLayer &) = delete; /** Default move assignment operator */ NELocallyConnectedLayer &operator=(NELocallyConnectedLayer &&) = default; + /** Default destructor */ + ~NELocallyConnectedLayer(); /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -94,16 +96,16 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - NEIm2ColKernel _input_im2col_kernel; - NEWeightsReshapeKernel _weights_reshape_kernel; - NELocallyConnectedMatrixMultiplyKernel _mm_kernel; - NECol2ImKernel _output_col2im_kernel; - Tensor _input_im2col_reshaped; - Tensor _weights_reshaped; - Tensor _gemm_output; - bool _is_prepared; - const ITensor *_original_weights; + MemoryGroup _memory_group; + NEIm2Col _input_im2col; + std::unique_ptr _weights_reshape_kernel; + std::unique_ptr _mm_kernel; + NECol2Im _output_col2im; + Tensor _input_im2col_reshaped; + Tensor _weights_reshaped; + Tensor _gemm_output; + bool _is_prepared; + const ITensor *_original_weights; }; } #endif /* ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEMagnitude.h b/arm_compute/runtime/NEON/functions/NEMagnitude.h index 56c88c2125..50935b619d 100644 --- a/arm_compute/runtime/NEON/functions/NEMagnitude.h +++ b/arm_compute/runtime/NEON/functions/NEMagnitude.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_NEMAGNITUDE_H #define ARM_COMPUTE_NEMAGNITUDE_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute @@ -34,6 +35,18 @@ class ITensor; class NEMagnitude : public INESimpleFunctionNoBorder { public: + /** Constructor */ + NEMagnitude() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitude(const NEMagnitude &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitude &operator=(const NEMagnitude &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMagnitude(NEMagnitude &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMagnitude &operator=(NEMagnitude &&) = delete; + /** Default destructor */ + ~NEMagnitude(); /** Initialise the kernel's inputs. * * @param[in] input1 First tensor input. Data type supported: S16. diff --git a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h index f13b4bd9e2..5b5bb5cb78 100644 --- a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h +++ b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h @@ -24,14 +24,16 @@ #ifndef ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H #define ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h" +#include namespace arm_compute { class ITensor; +class ITensorInfo; +class NEMemsetKernel; +class NEMaxUnpoolingLayerKernel; /** Function to perform MaxUnpooling. This function calls the following NEON kernels: * @@ -43,6 +45,16 @@ class NEMaxUnpoolingLayer : public IFunction public: /** Constructor */ NEMaxUnpoolingLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMaxUnpoolingLayer(const NEMaxUnpoolingLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMaxUnpoolingLayer &operator=(const NEMaxUnpoolingLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMaxUnpoolingLayer(NEMaxUnpoolingLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMaxUnpoolingLayer &operator=(NEMaxUnpoolingLayer &&) = delete; + /** Default destructor */ + ~NEMaxUnpoolingLayer(); /** Set the input and output tensors. * * @note Only supported pool size 2 @@ -70,8 +82,8 @@ public: void run() override; private: - NEMemsetKernel _memset_kernel; - NEMaxUnpoolingLayerKernel _unpooling_layer_kernel; + std::unique_ptr _memset_kernel; + std::unique_ptr _unpooling_layer_kernel; }; } #endif /* ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h index 120f703140..875c3630c1 100644 --- a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h +++ b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,15 +24,18 @@ #ifndef ARM_COMPUTE_NEMEANSTDDEV_H #define ARM_COMPUTE_NEMEANSTDDEV_H -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h" +#include "arm_compute/core/IMultiImage.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" +#include #include namespace arm_compute { +class NEMeanStdDevKernel; +class NEFillBorderKernel; + /** Basic function to execute mean and std deviation. This function calls the following NEON kernels: * * @ref NEMeanStdDevKernel @@ -43,6 +46,16 @@ class NEMeanStdDev : public IFunction public: /** Default Constructor. */ NEMeanStdDev(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDev(const NEMeanStdDev &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDev &operator=(const NEMeanStdDev &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMeanStdDev(NEMeanStdDev &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMeanStdDev &operator=(NEMeanStdDev &&) = delete; + /** Default destructor */ + ~NEMeanStdDev(); /** Initialise the kernel's inputs and outputs. * * @param[in, out] input Input image. Data types supported: U8. (Written to only for border filling) @@ -55,10 +68,10 @@ public: void run() override; private: - NEMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */ - NEFillBorderKernel _fill_border_kernel; /**< Kernel that fills tensor's borders with zeroes. */ - uint64_t _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */ - uint64_t _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */ + std::unique_ptr _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */ + std::unique_ptr _fill_border_kernel; /**< Kernel that fills tensor's borders with zeroes. */ + uint64_t _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */ + uint64_t _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */ }; } #endif /*ARM_COMPUTE_NEMEANSTDDEV_H */ diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h index 132ab8a01b..31e376191c 100644 --- a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,11 +30,24 @@ namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to execute mean and standard deviation normalization by calling @ref NEMeanStdDevNormalizationKernel */ class NEMeanStdDevNormalizationLayer : public INESimpleFunctionNoBorder { public: + /** Constructor */ + NEMeanStdDevNormalizationLayer() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevNormalizationLayer(const NEMeanStdDevNormalizationLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevNormalizationLayer &operator=(const NEMeanStdDevNormalizationLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMeanStdDevNormalizationLayer(NEMeanStdDevNormalizationLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMeanStdDevNormalizationLayer &operator=(NEMeanStdDevNormalizationLayer &&) = delete; + /** Default destructor */ + ~NEMeanStdDevNormalizationLayer(); /** Initialise the function's input and outputs. * * @note If the output tensor is a nullptr, the normalization will be performed in-place. diff --git a/arm_compute/runtime/NEON/functions/NEMedian3x3.h b/arm_compute/runtime/NEON/functions/NEMedian3x3.h index 8d860e2103..4b5f60382f 100644 --- a/arm_compute/runtime/NEON/functions/NEMedian3x3.h +++ b/arm_compute/runtime/NEON/functions/NEMedian3x3.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h index caa66a0c16..5959bbb0ad 100644 --- a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h +++ b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,15 +25,17 @@ #define ARM_COMPUTE_NEMINMAXLOCATION_H #include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h" #include "arm_compute/runtime/Array.h" #include "arm_compute/runtime/IFunction.h" #include +#include namespace arm_compute { class ITensor; +class NEMinMaxKernel; +class NEMinMaxLocationKernel; using IImage = ITensor; /** Basic function to execute min and max location. This function calls the following NEON kernels: @@ -46,6 +48,16 @@ class NEMinMaxLocation : public IFunction public: /** Constructor */ NEMinMaxLocation(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxLocation(const NEMinMaxLocation &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxLocation &operator=(const NEMinMaxLocation &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMinMaxLocation(NEMinMaxLocation &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMinMaxLocation &operator=(NEMinMaxLocation &&) = delete; + /** Default destructor */ + ~NEMinMaxLocation(); /** Initialise the kernel's inputs and outputs. * * @param[in] input Input image. Data types supported: U8/S16/F32. @@ -64,8 +76,8 @@ public: void run() override; private: - NEMinMaxKernel _min_max; /**< Kernel that performs min/max */ - NEMinMaxLocationKernel _min_max_loc; /**< Kernel that extracts min/max locations */ + std::unique_ptr _min_max; /**< Kernel that performs min/max */ + std::unique_ptr _min_max_loc; /**< Kernel that extracts min/max locations */ }; } #endif /*ARM_COMPUTE_NEMINMAXLOCATION_H */ diff --git a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h b/arm_compute/runtime/NEON/functions/NENonLinearFilter.h index d2a85837fd..fe1b190db2 100644 --- a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h +++ b/arm_compute/runtime/NEON/functions/NENonLinearFilter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h index 07d4b16cf1..bad633afac 100644 --- a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h +++ b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h index fcdba12046..6519f9b4e6 100644 --- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h @@ -26,8 +26,6 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -39,6 +37,7 @@ namespace arm_compute { class ITensor; +class NENormalizationLayerKernel; /** Basic function to compute a normalization layer. This function calls the following NEON kernels: * @@ -52,6 +51,16 @@ class NENormalizationLayer : public IFunction public: /** Default constructor */ NENormalizationLayer(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENormalizationLayer(const NENormalizationLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENormalizationLayer &operator=(const NENormalizationLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NENormalizationLayer(NENormalizationLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NENormalizationLayer &operator=(NENormalizationLayer &&) = delete; + /** Default destructor */ + ~NENormalizationLayer(); /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], @@ -75,10 +84,10 @@ public: void run() override; private: - MemoryGroup _memory_group; /**< Function memory group */ - NENormalizationLayerKernel _norm_kernel; /**< Normalization layer kernel */ - NEPixelWiseMultiplication _multiply_f; /**< Pixel multiplication function */ - Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */ + MemoryGroup _memory_group; /**< Function memory group */ + std::unique_ptr _norm_kernel; /**< Normalization layer kernel */ + NEPixelWiseMultiplication _multiply_f; /**< Pixel multiplication function */ + Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */ }; } #endif /* ARM_COMPUTE_NENORMALIZATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h index 141ee7ea41..a9f985a422 100644 --- a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h +++ b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ #define ARM_COMPUTE_NEOPTICALFLOW_H #include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" +#include "arm_compute/core/Types.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/Array.h" #include "arm_compute/runtime/IFunction.h" @@ -41,6 +41,7 @@ namespace arm_compute { class Pyramid; +class NELKTrackerKernel; /** Array of LK Internel Keypoints */ using LKInternalKeypointArray = Array; @@ -62,6 +63,8 @@ public: NEOpticalFlow(const NEOpticalFlow &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ NEOpticalFlow &operator=(const NEOpticalFlow &) = delete; + /** Default destructor */ + ~NEOpticalFlow(); /** Initialise the function input and output * * @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data type supported U8 @@ -86,17 +89,17 @@ public: void run() override; private: - MemoryGroup _memory_group; - std::vector _func_scharr; - std::vector _kernel_tracker; - std::vector _scharr_gx; - std::vector _scharr_gy; - IKeyPointArray *_new_points; - const IKeyPointArray *_new_points_estimates; - const IKeyPointArray *_old_points; - LKInternalKeypointArray _new_points_internal; - LKInternalKeypointArray _old_points_internal; - unsigned int _num_levels; + MemoryGroup _memory_group; + std::vector _func_scharr; + std::vector> _kernel_tracker; + std::vector _scharr_gx; + std::vector _scharr_gy; + IKeyPointArray *_new_points; + const IKeyPointArray *_new_points_estimates; + const IKeyPointArray *_old_points; + LKInternalKeypointArray _new_points_internal; + LKInternalKeypointArray _old_points_internal; + unsigned int _num_levels; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEOPTICALFLOW_H */ diff --git a/arm_compute/runtime/NEON/functions/NEPReluLayer.h b/arm_compute/runtime/NEON/functions/NEPReluLayer.h index 756058b5ec..358e633000 100644 --- a/arm_compute/runtime/NEON/functions/NEPReluLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPReluLayer.h @@ -31,6 +31,7 @@ namespace arm_compute { class ITensor; +class ITensorInfo; namespace experimental { diff --git a/arm_compute/runtime/NEON/functions/NEPadLayer.h b/arm_compute/runtime/NEON/functions/NEPadLayer.h index fcb7c36312..3fdbb0d73c 100644 --- a/arm_compute/runtime/NEON/functions/NEPadLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPadLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,13 +29,15 @@ #include "arm_compute/runtime/NEON/functions/NEStridedSlice.h" #include "arm_compute/runtime/SubTensor.h" -#include "arm_compute/core/NEON/kernels/NECopyKernel.h" -#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/Tensor.h" +#include namespace arm_compute { +class NECopyKernel; +class NEPadLayerKernel; + /** Basic function to pad a tensor. This function calls the following NEON functions/kernels: * * - For padding mode = PaddingMode::CONSTANT: @@ -49,8 +51,18 @@ namespace arm_compute class NEPadLayer : public IFunction { public: - /** Default constructor*/ + /** Default Constructor */ NEPadLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPadLayer(const NEPadLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPadLayer &operator=(const NEPadLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEPadLayer(NEPadLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEPadLayer &operator=(NEPadLayer &&) = delete; + /** Default destructor */ + ~NEPadLayer(); /** Initialize the function * * @param[in] input Source tensor. Data types supported: All. @@ -97,15 +109,15 @@ private: void configure_reflect_symmetric_mode(ITensor *input, ITensor *output); private: - NECopyKernel _copy_kernel; - NEPadLayerKernel _pad_kernel; - PaddingMode _mode; - PaddingList _padding; - uint32_t _num_dimensions; - std::vector _slice_functions; - std::vector _concat_functions; - std::vector _slice_results; - std::vector _concat_results; + std::unique_ptr _copy_kernel; + std::unique_ptr _pad_kernel; + PaddingMode _mode; + PaddingList _padding; + uint32_t _num_dimensions; + std::vector _slice_functions; + std::vector _concat_functions; + std::vector _slice_results; + std::vector _concat_results; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEPADLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h index 3be42c8346..ef8854b360 100644 --- a/arm_compute/runtime/NEON/functions/NEPermute.h +++ b/arm_compute/runtime/NEON/functions/NEPermute.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,6 +32,7 @@ namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; /** Basic function to run @ref NEPermuteKernel */ class NEPermute : public INESimpleFunctionNoBorder diff --git a/arm_compute/runtime/NEON/functions/NEPhase.h b/arm_compute/runtime/NEON/functions/NEPhase.h index c492073e22..626023c099 100644 --- a/arm_compute/runtime/NEON/functions/NEPhase.h +++ b/arm_compute/runtime/NEON/functions/NEPhase.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,11 +24,13 @@ #ifndef ARM_COMPUTE_NEPHASE_H #define ARM_COMPUTE_NEPHASE_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NEMagnitudePhaseKernel */ class NEPhase : public INESimpleFunctionNoBorder diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h index e1072980cf..91cf44ff2e 100644 --- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h +++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h @@ -31,6 +31,7 @@ namespace arm_compute { class ITensor; +class ITensorInfo; namespace experimental { diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h index 000c754ec8..b45290fb46 100644 --- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h @@ -26,13 +26,15 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h" #include "arm_compute/core/Types.h" +#include namespace arm_compute { class ITensor; +class ITensorInfo; +class NEPoolingLayerKernel; +class NEFillBorderKernel; /** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following NEON kernels: * @@ -44,6 +46,16 @@ class NEPoolingLayer : public IFunction public: /** Constructor */ NEPoolingLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPoolingLayer(const NEPoolingLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPoolingLayer &operator=(const NEPoolingLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEPoolingLayer(NEPoolingLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEPoolingLayer &operator=(NEPoolingLayer &&) = delete; + /** Default destructor */ + ~NEPoolingLayer(); /** Set the input and output tensors. * * @note F16 is supported for pool sizes 2 and 3 only @@ -71,10 +83,10 @@ public: void run() override; private: - NEPoolingLayerKernel _pooling_layer_kernel; - NEFillBorderKernel _border_handler; - bool _is_global_pooling_layer; - DataLayout _data_layout; + std::unique_ptr _pooling_layer_kernel; + std::unique_ptr _border_handler; + bool _is_global_pooling_layer; + DataLayout _data_layout; }; } #endif /* ARM_COMPUTE_NEPOOLINGLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h index d4bb42fd07..3cc79fa28e 100644 --- a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,13 +24,13 @@ #ifndef ARM_COMPUTE_NEPRIORBOXLAYER_H #define ARM_COMPUTE_NEPRIORBOXLAYER_H -#include "arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NEPriorBoxLayerKernel. */ class NEPriorBoxLayer : public INESimpleFunctionNoBorder diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h index 7c572de874..17ad5a354b 100644 --- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h @@ -24,24 +24,27 @@ #ifndef ARM_COMPUTE_NEQLSTMLAYER_H #define ARM_COMPUTE_NEQLSTMLAYER_H -#include "arm_compute/core/NEON/kernels/NECopyKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h" -#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" +#include "arm_compute/runtime/NEON/functions/NECopy.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" #include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" #include "arm_compute/runtime/NEON/functions/NETranspose.h" +#include "support/MemorySupport.h" #include "arm_compute/runtime/common/LSTMParams.h" +#include namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; +class NEQLSTMLayerNormalizationKernel; +class NEGEMMLowpMatrixAReductionKernel; /** Basic function to run @ref NEQLSTMLayer * @@ -70,6 +73,8 @@ public: NEQLSTMLayer &operator=(const NEQLSTMLayer &) = delete; /** Default move assignment operator */ NEQLSTMLayer &operator=(NEQLSTMLayer &&) = default; + /** Default destructor */ + ~NEQLSTMLayer(); /** Initialize function's tensors. * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED. @@ -204,7 +209,7 @@ private: Tensor *outstage_res, float gemmlowp_scale, const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info); - MemoryGroup _memory_group{}; + MemoryGroup _memory_group; /** A small internel kernel do the copy between two tensors */ class TensorCopyKernel @@ -217,6 +222,8 @@ private: Window _window{}; public: + /** Destructor */ + ~TensorCopyKernel(); /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayer::TensorCopyKernel * * @param[in] src Source tensor info. @@ -236,79 +243,79 @@ private: }; // Functions used - NETranspose _transpose_input_to_forget_weights{}; - NETranspose _transpose_input_to_cell_weights{}; - NETranspose _transpose_input_to_output_weights{}; - NETranspose _transpose_input_to_input_weights{}; - NETranspose _transpose_recurrent_to_forget_weights{}; - NETranspose _transpose_recurrent_to_cell_weights{}; - NETranspose _transpose_recurrent_to_output_weights{}; - NETranspose _transpose_recurrent_to_input_weights{}; - NETranspose _transpose_projection_weights{}; - NEGEMMLowpMatrixAReductionKernel _input_to_input_reduction{}; - NEGEMMLowpMatrixAReductionKernel _recurrent_to_input_reduction{}; - NEGEMMLowpMatrixAReductionKernel _input_to_forget_reduction{}; - NEGEMMLowpMatrixAReductionKernel _recurrent_to_forget_reduction{}; - NEGEMMLowpMatrixAReductionKernel _input_to_cell_reduction{}; - NEGEMMLowpMatrixAReductionKernel _recurrent_to_cell_reduction{}; - NEGEMMLowpMatrixAReductionKernel _input_to_output_reduction{}; - NEGEMMLowpMatrixAReductionKernel _recurrent_to_output_reduction{}; - NEGEMMLowpMatrixAReductionKernel _projection_reduction{}; - NEArithmeticAddition _projection_bias_add{}; - NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget{}; - NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{}; - NEPixelWiseMultiplication _pixelwise_mul_cell_to_forget{}; - NEGEMMLowpOutputStage _input_to_forget_outstage{}; - NEGEMMLowpOutputStage _recurrent_to_forget_outstage{}; - NEGEMMLowpOutputStage _cell_to_forget_outstage{}; - NEArithmeticAddition _accumulate_input_recurrent_forget{}; - NEArithmeticAddition _accumulate_cell_forget{}; - NEActivationLayer _forget_gate_sigmoid{}; - NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell{}; - NEGEMMLowpOutputStage _input_to_cell_outstage{}; - NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{}; - NEGEMMLowpOutputStage _recurrent_to_cell_outstage{}; - NEArithmeticAddition _accumulate_input_recurrent_modulation{}; - NEActivationLayer _cell_gate_tanh{}; - NEArithmeticSubtraction _input_gate_sub{}; - NEGEMMLowpMatrixMultiplyCore _mm_input_to_input{}; - NEGEMMLowpOutputStage _input_to_input_outstage{}; - NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{}; - NEGEMMLowpOutputStage _recurrent_to_input_outstage{}; - NEArithmeticAddition _accumulate_input_recurrent_input{}; - NEPixelWiseMultiplication _pixelwise_mul_cell_to_input{}; - NEGEMMLowpOutputStage _cell_to_input_outstage{}; - NEArithmeticAddition _accumulate_cell_input{}; - NEActivationLayer _input_gate_sigmoid{}; - NEPixelWiseMultiplication _pixelwise_mul_forget_cell{}; - NEPixelWiseMultiplication _pixelwise_mul_input_cell{}; - NEArithmeticAddition _add_forget_cell{}; - NEActivationLayer _cell_clip{}; - NEGEMMLowpMatrixMultiplyCore _mm_input_to_output{}; - NEGEMMLowpOutputStage _input_to_output_outstage{}; - NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{}; - NEGEMMLowpOutputStage _recurrent_to_output_outstage{}; - NEArithmeticAddition _accumulate_input_recurrent_output{}; - NEPixelWiseMultiplication _pixelwise_mul_cell_to_output{}; - NEGEMMLowpOutputStage _cell_to_output_outstage{}; - NEArithmeticAddition _accumulate_cell_to_output{}; - NEActivationLayer _output_gate_sigmoid{}; - NEActivationLayer _hidden_tanh{}; - NEPixelWiseMultiplication _pixelwise_mul_hidden{}; - NEGEMMLowpOutputStage _hidden_outstage{}; - NEGEMMLowpMatrixMultiplyCore _mm_projection{}; - NEGEMMLowpOutputStage _projection_outstage{}; - NEArithmeticAddition _accumulate_projection{}; - NEActivationLayer _projection_clip{}; + NETranspose _transpose_input_to_forget_weights; + NETranspose _transpose_input_to_cell_weights; + NETranspose _transpose_input_to_output_weights; + NETranspose _transpose_input_to_input_weights; + NETranspose _transpose_recurrent_to_forget_weights; + NETranspose _transpose_recurrent_to_cell_weights; + NETranspose _transpose_recurrent_to_output_weights; + NETranspose _transpose_recurrent_to_input_weights; + NETranspose _transpose_projection_weights; + std::unique_ptr _input_to_input_reduction; + std::unique_ptr _recurrent_to_input_reduction; + std::unique_ptr _input_to_forget_reduction; + std::unique_ptr _recurrent_to_forget_reduction; + std::unique_ptr _input_to_cell_reduction; + std::unique_ptr _recurrent_to_cell_reduction; + std::unique_ptr _input_to_output_reduction; + std::unique_ptr _recurrent_to_output_reduction; + std::unique_ptr _projection_reduction; + NEArithmeticAddition _projection_bias_add; + NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget; + NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget; + NEPixelWiseMultiplication _pixelwise_mul_cell_to_forget; + NEGEMMLowpOutputStage _input_to_forget_outstage; + NEGEMMLowpOutputStage _recurrent_to_forget_outstage; + NEGEMMLowpOutputStage _cell_to_forget_outstage; + NEArithmeticAddition _accumulate_input_recurrent_forget; + NEArithmeticAddition _accumulate_cell_forget; + NEActivationLayer _forget_gate_sigmoid; + NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell; + NEGEMMLowpOutputStage _input_to_cell_outstage; + NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell; + NEGEMMLowpOutputStage _recurrent_to_cell_outstage; + NEArithmeticAddition _accumulate_input_recurrent_modulation; + NEActivationLayer _cell_gate_tanh; + NEArithmeticSubtraction _input_gate_sub; + NEGEMMLowpMatrixMultiplyCore _mm_input_to_input; + NEGEMMLowpOutputStage _input_to_input_outstage; + NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input; + NEGEMMLowpOutputStage _recurrent_to_input_outstage; + NEArithmeticAddition _accumulate_input_recurrent_input; + NEPixelWiseMultiplication _pixelwise_mul_cell_to_input; + NEGEMMLowpOutputStage _cell_to_input_outstage; + NEArithmeticAddition _accumulate_cell_input; + NEActivationLayer _input_gate_sigmoid; + NEPixelWiseMultiplication _pixelwise_mul_forget_cell; + NEPixelWiseMultiplication _pixelwise_mul_input_cell; + NEArithmeticAddition _add_forget_cell; + NEActivationLayer _cell_clip; + NEGEMMLowpMatrixMultiplyCore _mm_input_to_output; + NEGEMMLowpOutputStage _input_to_output_outstage; + NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output; + NEGEMMLowpOutputStage _recurrent_to_output_outstage; + NEArithmeticAddition _accumulate_input_recurrent_output; + NEPixelWiseMultiplication _pixelwise_mul_cell_to_output; + NEGEMMLowpOutputStage _cell_to_output_outstage; + NEArithmeticAddition _accumulate_cell_to_output; + NEActivationLayer _output_gate_sigmoid; + NEActivationLayer _hidden_tanh; + NEPixelWiseMultiplication _pixelwise_mul_hidden; + NEGEMMLowpOutputStage _hidden_outstage; + NEGEMMLowpMatrixMultiplyCore _mm_projection; + NEGEMMLowpOutputStage _projection_outstage; + NEArithmeticAddition _accumulate_projection; + NEActivationLayer _projection_clip; - TensorCopyKernel _projection_bias_copy{}; - TensorCopyKernel _projection_output_to_accumulate_copy{}; - TensorCopyKernel _projection_accumulate_to_output_copy{}; - TensorCopyKernel _hidden_to_output_copy{}; + TensorCopyKernel _projection_bias_copy; + TensorCopyKernel _projection_output_to_accumulate_copy; + TensorCopyKernel _projection_accumulate_to_output_copy; + TensorCopyKernel _hidden_to_output_copy; - std::array _layer_norms{ {} }; + std::array, _layer_norm_count> _layer_norms; - NECopyKernel _copy_output{}; + NECopy _copy_output; // Tensor pointers const ITensor *_input_to_input_weights @@ -324,8 +331,8 @@ private: const ITensor *_recurrent_to_cell_weights{ nullptr }; const ITensor *_recurrent_to_output_weights{ nullptr }; const ITensor *_projection_weights{ nullptr }; - std::array _layer_norm_weights{ {} }; - std::array _layer_norm_bias{ {} }; + std::array _layer_norm_weights{}; + std::array _layer_norm_bias{}; using LayerNormIndexType = typename std::underlying_type::type; inline LayerNormIndexType getGateIndex(LayerNormGate g) @@ -353,32 +360,13 @@ private: return _layer_norm_bias[getGateIndex(g)]; } - inline NEQLSTMLayerNormalizationKernel &get_layer_norm(LayerNormGate g) + inline std::unique_ptr &get_layer_norm(LayerNormGate g) { return _layer_norms[getGateIndex(g)]; } - inline void configure_layer_norm(LayerNormGate g, const ITensor *in) - { - ARM_COMPUTE_ERROR_ON(!_has_layer_norm); - - Tensor &out = get_layer_norm_output(g); - _memory_group.manage(&out); - out.allocator()->init(*(in->info())); - - get_layer_norm(g).configure(in, &out, get_layer_norm_weight(g), get_layer_norm_bias(g)); - } - - inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias) - { - // Output quantization scale will be different, but ignored here - // since it will be configured at configure() stage. - const TensorInfo out - { - in - }; - return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias); - } + void configure_layer_norm(LayerNormGate g, const ITensor *in); + static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias); // Temporary tensors Tensor _input_to_forget_weights_transposed{ nullptr }; @@ -434,7 +422,7 @@ private: Tensor _projection_out_res{ nullptr }; Tensor _projection_accumulate_res{ nullptr }; Tensor _ones{ nullptr }; - std::array _layer_norm_output{ {} }; + std::array _layer_norm_output{}; inline Tensor &get_layer_norm_output(LayerNormGate g) { diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h index 266b3df87a..36302f4741 100644 --- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h @@ -26,7 +26,6 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include "arm_compute/core/Types.h" @@ -34,6 +33,7 @@ namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to simulate a quantization layer. This function calls the following NEON kernels: * @@ -44,8 +44,6 @@ class ITensor; class NEQuantizationLayer : public INESimpleFunctionNoBorder { public: - /** Default constructor */ - NEQuantizationLayer() = default; /** Set the input and output tensors. * * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h index 12e3ef9c57..74fdc59af6 100644 --- a/arm_compute/runtime/NEON/functions/NERNNLayer.h +++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_NERNNLAYER_H #define ARM_COMPUTE_NERNNLAYER_H -#include "arm_compute/core/NEON/kernels/NECopyKernel.h" - #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" @@ -36,6 +34,7 @@ namespace arm_compute { // Forward declarations class ITensor; +class NECopyKernel; /** Basic function to run @ref NERNNLayer */ class NERNNLayer : public IFunction @@ -51,6 +50,8 @@ public: NERNNLayer &operator=(const NERNNLayer &) = delete; /** Default move assignment operator */ NERNNLayer &operator=(NERNNLayer &&) = default; + /** Default destructor */ + ~NERNNLayer(); /** Initialize the function * * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32 @@ -82,16 +83,16 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - NEGEMM _gemm_state_f; - NEArithmeticAddition _add_f; - NEActivationLayer _activation; - NEFullyConnectedLayer _fully_connected; - NECopyKernel _copy_kernel; - Tensor _fully_connected_out; - Tensor _gemm_output; - Tensor _add_output; - bool _is_prepared; + MemoryGroup _memory_group; + NEGEMM _gemm_state_f; + NEArithmeticAddition _add_f; + NEActivationLayer _activation; + NEFullyConnectedLayer _fully_connected; + std::unique_ptr _copy_kernel; + Tensor _fully_connected_out; + Tensor _gemm_output; + Tensor _add_output; + bool _is_prepared; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NERNNLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h index 3e8db55f99..1d992f53df 100644 --- a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h +++ b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h @@ -24,11 +24,13 @@ #ifndef ARM_COMPUTE_NEROIALIGNLAYER_H #define ARM_COMPUTE_NEROIALIGNLAYER_H -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NEROIAlignLayerKernel. * @@ -36,7 +38,7 @@ class ITensor; * -# @ref NEROIAlignLayerKernel * */ -class NEROIAlignLayer : public INESimpleFunction +class NEROIAlignLayer : public INESimpleFunctionNoBorder { public: /** Set the input and output tensors. diff --git a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h index 08885d0e58..0b9b4f75fc 100644 --- a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h +++ b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,11 +27,13 @@ #include "arm_compute/runtime/IFunction.h" #include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h" +#include namespace arm_compute { class ITensor; +class NEROIPoolingLayerKernel; +class ROIPoolingLayerInfo; /** Basic function to run @ref NEROIPoolingLayerKernel. * @@ -44,6 +46,16 @@ class NEROIPoolingLayer : public IFunction public: /** Constructor */ NEROIPoolingLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEROIPoolingLayer(const NEROIPoolingLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEROIPoolingLayer &operator=(const NEROIPoolingLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEROIPoolingLayer(NEROIPoolingLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEROIPoolingLayer &operator=(NEROIPoolingLayer &&) = delete; + /** Default destructor */ + ~NEROIPoolingLayer(); /** Set the input and output tensors. * * @param[in] input Source tensor. Data types supported: F32. @@ -63,7 +75,7 @@ public: void run() override; private: - NEROIPoolingLayerKernel _roi_kernel; + std::unique_ptr _roi_kernel; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEROIPOOLINGLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NERange.h b/arm_compute/runtime/NEON/functions/NERange.h index 04889d4d6f..844a47d2ae 100644 --- a/arm_compute/runtime/NEON/functions/NERange.h +++ b/arm_compute/runtime/NEON/functions/NERange.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,13 +24,15 @@ #ifndef ARM_COMPUTE_NERANGE_H #define ARM_COMPUTE_NERANGE_H -#include "arm_compute/core/NEON/kernels/NERangeKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" +#include namespace arm_compute { class ITensor; +class ITensorInfo; +class NERangeKernel; /** Basic function to run @ref NERangeKernel * @@ -42,6 +44,16 @@ class NERange : public IFunction public: /** Default constructor */ NERange(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERange(const NERange &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERange &operator=(const NERange &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NERange(NERange &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NERange &operator=(NERange &&) = delete; + /** Default destructor */ + ~NERange(); /** Initialize the kernel's start, end, step and output tensor. * * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. @@ -65,7 +77,7 @@ public: void run() override; private: - NERangeKernel _kernel; + std::unique_ptr _kernel; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NERANGE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEReduceMean.h b/arm_compute/runtime/NEON/functions/NEReduceMean.h index eee3f7f799..89cd09812b 100644 --- a/arm_compute/runtime/NEON/functions/NEReduceMean.h +++ b/arm_compute/runtime/NEON/functions/NEReduceMean.h @@ -26,7 +26,6 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" @@ -43,6 +42,16 @@ class NEReduceMean : public IFunction public: /** Constructor */ NEReduceMean(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReduceMean(const NEReduceMean &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReduceMean &operator=(const NEReduceMean &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEReduceMean(NEReduceMean &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEReduceMean &operator=(NEReduceMean &&) = delete; + /** Default destructor */ + ~NEReduceMean(); /** Configure kernel * * @note Supported tensor rank: up to 4 diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h index aafccb00e3..8186e2e355 100644 --- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h +++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h @@ -26,13 +26,14 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h" #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" #include "arm_compute/runtime/Tensor.h" +#include namespace arm_compute { class ITensor; +class NEReductionOperationKernel; /** Basic function to simulate a reduction operation. This function calls the following NEON kernels: * @@ -45,6 +46,16 @@ class NEReductionOperation : public IFunction public: /** Default constructor */ NEReductionOperation(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReductionOperation(const NEReductionOperation &) = delete; + /** Default move constructor */ + NEReductionOperation(NEReductionOperation &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReductionOperation &operator=(const NEReductionOperation &) = delete; + /** Default move assignment operator */ + NEReductionOperation &operator=(NEReductionOperation &&) = default; + /** Default destructor */ + ~NEReductionOperation(); /** Set the input and output tensors. * * @param[in, out] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. (Written to only for border_size != 0) @@ -71,13 +82,13 @@ public: void run() override; private: - MemoryGroup _memory_group; - NEReductionOperationKernel _reduction_kernel; - NEReshapeLayer _reshape; - Tensor _output_internal; - size_t _window_split; - int _reduction_axis; - bool _is_reshape_required; + MemoryGroup _memory_group; + std::unique_ptr _reduction_kernel; + NEReshapeLayer _reshape; + Tensor _output_internal; + size_t _window_split; + int _reduction_axis; + bool _is_reshape_required; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEREDUCTIONOPERATION_H */ diff --git a/arm_compute/runtime/NEON/functions/NERemap.h b/arm_compute/runtime/NEON/functions/NERemap.h index f087bd2e3c..d870ce6e51 100644 --- a/arm_compute/runtime/NEON/functions/NERemap.h +++ b/arm_compute/runtime/NEON/functions/NERemap.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NEReorgLayer.h b/arm_compute/runtime/NEON/functions/NEReorgLayer.h index 19385e1b74..f76d1d252c 100644 --- a/arm_compute/runtime/NEON/functions/NEReorgLayer.h +++ b/arm_compute/runtime/NEON/functions/NEReorgLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,6 +31,7 @@ namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; /** Basic function to run @ref NEReorgLayerKernel */ class NEReorgLayer : public INESimpleFunctionNoBorder diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h index 2ca6660139..641a96e0f9 100644 --- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h +++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_NERESHAPELAYER_H #define ARM_COMPUTE_NERESHAPELAYER_H -#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/INEOperator.h" @@ -81,6 +80,18 @@ namespace experimental class NEReshape : public INEOperator { public: + /** Default Constructor */ + NEReshape() = default; + /** Default Destructor */ + ~NEReshape(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReshape(const NEReshape &) = delete; + /** Default move constructor */ + NEReshape(NEReshapeLayer &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReshape &operator=(const NEReshape &) = delete; + /** Default move assignment operator */ + NEReshape &operator=(NEReshape &&); /** Initialise the kernel's inputs and outputs * * @param[in] input Input tensor info. Data type supported: All diff --git a/arm_compute/runtime/NEON/functions/NEReverse.h b/arm_compute/runtime/NEON/functions/NEReverse.h index 7a4566db28..2048dafcb5 100644 --- a/arm_compute/runtime/NEON/functions/NEReverse.h +++ b/arm_compute/runtime/NEON/functions/NEReverse.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,6 +31,7 @@ namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NEReverseKernel */ class NEReverse : public INESimpleFunctionNoBorder diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h index 4063e558a6..fceda83510 100644 --- a/arm_compute/runtime/NEON/functions/NEScale.h +++ b/arm_compute/runtime/NEON/functions/NEScale.h @@ -24,7 +24,7 @@ #ifndef ARM_COMPUTE_NESCALEIMAGE_H #define ARM_COMPUTE_NESCALEIMAGE_H -#include "arm_compute/core/NEON/kernels/NEScaleKernel.h" +#include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include "arm_compute/runtime/Tensor.h" diff --git a/arm_compute/runtime/NEON/functions/NESelect.h b/arm_compute/runtime/NEON/functions/NESelect.h index 258ac5d64d..c66fbfa7d4 100644 --- a/arm_compute/runtime/NEON/functions/NESelect.h +++ b/arm_compute/runtime/NEON/functions/NESelect.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,15 +25,16 @@ #define ARM_COMPUTE_NESELECT_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; /** Basic function to run @ref NESelect */ -class NESelect : public INESimpleFunction +class NESelect : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs and output. diff --git a/arm_compute/runtime/NEON/functions/NESobel3x3.h b/arm_compute/runtime/NEON/functions/NESobel3x3.h index 4dbdfd223b..a0b8f6296f 100644 --- a/arm_compute/runtime/NEON/functions/NESobel3x3.h +++ b/arm_compute/runtime/NEON/functions/NESobel3x3.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NESobel5x5.h b/arm_compute/runtime/NEON/functions/NESobel5x5.h index b5365bc1b7..b17f9e7972 100644 --- a/arm_compute/runtime/NEON/functions/NESobel5x5.h +++ b/arm_compute/runtime/NEON/functions/NESobel5x5.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_NESOBEL5x5_H #define ARM_COMPUTE_NESOBEL5x5_H -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -38,6 +36,9 @@ namespace arm_compute { class ITensor; +class NESobel5x5HorKernel; +class NESobel5x5VertKernel; +class NEFillBorderKernel; /** Basic function to execute sobel 5x5 filter. This function calls the following NEON kernels: * @@ -51,6 +52,16 @@ class NESobel5x5 : public IFunction public: /** Default constructor */ NESobel5x5(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5(const NESobel5x5 &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5 &operator=(const NESobel5x5 &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NESobel5x5(NESobel5x5 &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NESobel5x5 &operator=(NESobel5x5 &&) = delete; + /** Default destructor */ + ~NESobel5x5(); /** Initialise the function's source, destinations and border mode. * * @note At least one of output_x or output_y must be not NULL. @@ -68,12 +79,12 @@ public: void run() override; protected: - MemoryGroup _memory_group; /**< Function memory group */ - NESobel5x5HorKernel _sobel_hor; /**< Sobel Horizontal 5x5 kernel */ - NESobel5x5VertKernel _sobel_vert; /**< Sobel Vertical 5x5 kernel */ - Tensor _tmp_x; /**< Temporary buffer for Sobel X */ - Tensor _tmp_y; /**< Temporary buffer for Sobel Y */ - NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */ + MemoryGroup _memory_group; /**< Function memory group */ + std::unique_ptr _sobel_hor; /**< Sobel Horizontal 5x5 kernel */ + std::unique_ptr _sobel_vert; /**< Sobel Vertical 5x5 kernel */ + Tensor _tmp_x; /**< Temporary buffer for Sobel X */ + Tensor _tmp_y; /**< Temporary buffer for Sobel Y */ + std::unique_ptr _border_handler; /**< Kernel to handle tensor borders */ }; } #endif /*ARM_COMPUTE_NESOBEL5x5_H */ diff --git a/arm_compute/runtime/NEON/functions/NESobel7x7.h b/arm_compute/runtime/NEON/functions/NESobel7x7.h index 925444d85b..cd0510a997 100644 --- a/arm_compute/runtime/NEON/functions/NESobel7x7.h +++ b/arm_compute/runtime/NEON/functions/NESobel7x7.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_NESOBEL7x7_H #define ARM_COMPUTE_NESOBEL7x7_H -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -38,6 +36,9 @@ namespace arm_compute { class ITensor; +class NESobel7x7HorKernel; +class NESobel7x7VertKernel; +class NEFillBorderKernel; /** Basic function to execute sobel 7x7 filter. This function calls the following NEON kernels: * @@ -51,6 +52,16 @@ class NESobel7x7 : public IFunction public: /** Default constructor */ NESobel7x7(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7(const NESobel7x7 &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7 &operator=(const NESobel7x7 &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NESobel7x7(NESobel7x7 &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NESobel7x7 &operator=(NESobel7x7 &&) = delete; + /** Default destructor */ + ~NESobel7x7(); /** Initialise the function's source, destinations and border mode. * * @note At least one of output_x or output_y must be not NULL. @@ -68,12 +79,12 @@ public: void run() override; protected: - MemoryGroup _memory_group; /**< Function memory group */ - NESobel7x7HorKernel _sobel_hor; /**< Sobel Horizontal 7x7 kernel */ - NESobel7x7VertKernel _sobel_vert; /**< Sobel Vertical 7x7 kernel */ - Tensor _tmp_x; /**< Temporary buffer for Sobel X */ - Tensor _tmp_y; /**< Temporary buffer for Sobel Y */ - NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */ + MemoryGroup _memory_group; /**< Function memory group */ + std::unique_ptr _sobel_hor; /**< Sobel Horizontal 7x7 kernel */ + std::unique_ptr _sobel_vert; /**< Sobel Vertical 7x7 kernel */ + Tensor _tmp_x; /**< Temporary buffer for Sobel X */ + Tensor _tmp_y; /**< Temporary buffer for Sobel Y */ + std::unique_ptr _border_handler; /**< Kernel to handle tensor borders */ }; } #endif /*ARM_COMPUTE_NESOBEL7x7_H */ diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h index 20b20201d5..40fa38afde 100644 --- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h @@ -24,16 +24,19 @@ #ifndef ARM_COMPUTE_NESOFTMAXLAYER_H #define ARM_COMPUTE_NESOFTMAXLAYER_H -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEPermute.h" #include "arm_compute/runtime/Tensor.h" +#include namespace arm_compute { class ITensor; +class NELogits1DMaxKernel; +template +class NELogits1DSoftmaxKernel; +class NEFillBorderKernel; /** Basic function to compute a SoftmaxLayer and a Log SoftmaxLayer. * @@ -64,6 +67,8 @@ public: NESoftmaxLayerGeneric &operator=(const NESoftmaxLayerGeneric &) = delete; /** Default move assignment operator */ NESoftmaxLayerGeneric &operator=(NESoftmaxLayerGeneric &&) = default; + /** Default destructor */ + ~NESoftmaxLayerGeneric(); /** Set the input and output tensors. * * @param[in,out] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. If the width is not a @@ -91,17 +96,17 @@ public: void run() override; private: - MemoryGroup _memory_group; - NEPermute _permute_input; - NEPermute _permute_output; - NELogits1DMaxKernel _max_kernel; - NELogits1DSoftmaxKernel _softmax_kernel; - NEFillBorderKernel _fill_border_kernel; - Tensor _max; - Tensor _tmp; - Tensor _input_permuted; - Tensor _output_permuted; - bool _needs_permute; + MemoryGroup _memory_group; + NEPermute _permute_input; + NEPermute _permute_output; + std::unique_ptr _max_kernel; + std::unique_ptr> _softmax_kernel; + std::unique_ptr _fill_border_kernel; + Tensor _max; + Tensor _tmp; + Tensor _input_permuted; + Tensor _output_permuted; + bool _needs_permute; }; using NESoftmaxLayer = NESoftmaxLayerGeneric; diff --git a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h index 6f339e8d52..6df06e87ec 100644 --- a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h +++ b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,13 +26,15 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h" -#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h" #include "arm_compute/core/Types.h" +#include namespace arm_compute { class ITensor; +class ITensorInfo; +class NESpaceToBatchLayerKernel; +class NEMemsetKernel; /** Basic function to spatial divide a tensor. This function calls the following NEON kernels/functions: * @@ -53,7 +55,7 @@ public: /** Allow instances of this class to be moved */ NESpaceToBatchLayer &operator=(NESpaceToBatchLayer &&) = default; /** Default destructor */ - virtual ~NESpaceToBatchLayer() = default; + ~NESpaceToBatchLayer(); /** Set the input and output tensors. * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. @@ -99,9 +101,9 @@ public: void run() override; private: - NESpaceToBatchLayerKernel _space_to_batch_kernel; /**< SpaceToBatch kernel to run */ - NEMemsetKernel _memset_kernel; /**< Memset kernel to run */ - bool _has_padding; /**< Flag to check if the output has padding */ + std::unique_ptr _space_to_batch_kernel; /**< SpaceToBatch kernel to run */ + std::unique_ptr _memset_kernel; /**< Memset kernel to run */ + bool _has_padding; /**< Flag to check if the output has padding */ }; } // namespace arm_compute #endif /* ARM_COMPUTE_NESPACETOBATCHLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h index 16a9c80d44..1e7aae215d 100644 --- a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h +++ b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,15 +24,16 @@ #ifndef ARM_COMPUTE_NESPACETODEPTHLAYER_H #define ARM_COMPUTE_NESPACETODEPTHLAYER_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h" -#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h" -#include "arm_compute/core/Types.h" +#include namespace arm_compute { class ITensor; +class ITensorInfo; +class NESpaceToDepthLayerKernel; /** This function calls the following NEON kernels/functions: * @@ -52,7 +53,7 @@ public: /** Allow instances of this class to be moved */ NESpaceToDepthLayer &operator=(NESpaceToDepthLayer &&) = default; /** Default destructor */ - virtual ~NESpaceToDepthLayer() = default; + ~NESpaceToDepthLayer(); /** Set the input and output tensors. * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. @@ -74,7 +75,7 @@ public: void run() override; private: - NESpaceToDepthLayerKernel _space_to_depth_kernel; /**< SpaceToDepth kernel to run */ + std::unique_ptr _space_to_depth_kernel; /**< SpaceToDepth kernel to run */ }; } // namespace arm_compute #endif /* ARM_COMPUTE_NESPACETODEPTHLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEStackLayer.h b/arm_compute/runtime/NEON/functions/NEStackLayer.h index 4180b6da08..f6fa4f2eb3 100644 --- a/arm_compute/runtime/NEON/functions/NEStackLayer.h +++ b/arm_compute/runtime/NEON/functions/NEStackLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,14 +27,14 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEStackLayerKernel.h" - #include #include namespace arm_compute { class ITensor; +class ITensorInfo; +class NEStackLayerKernel; /** Basic function to stack tensors along an axis. This function calls the following kernel: * @@ -46,6 +46,16 @@ class NEStackLayer : public IFunction public: /** Default constructor */ NEStackLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEStackLayer(const NEStackLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEStackLayer &operator=(const NEStackLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEStackLayer(NEStackLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEStackLayer &operator=(NEStackLayer &&) = delete; + /** Default destructor */ + ~NEStackLayer(); /** Initialise the kernel's inputs vector and output. * * @note Supported input tensor rank: up to 4 @@ -73,9 +83,9 @@ public: void run() override; private: - std::vector _input; - std::vector _stack_kernels; - unsigned int _num_inputs; + std::vector _input; + std::vector> _stack_kernels; + unsigned int _num_inputs; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NESTACKLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NETableLookup.h b/arm_compute/runtime/NEON/functions/NETableLookup.h index fb08274761..03674cd297 100644 --- a/arm_compute/runtime/NEON/functions/NETableLookup.h +++ b/arm_compute/runtime/NEON/functions/NETableLookup.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NEThreshold.h b/arm_compute/runtime/NEON/functions/NEThreshold.h index cb9b696769..0a9edfc7ca 100644 --- a/arm_compute/runtime/NEON/functions/NEThreshold.h +++ b/arm_compute/runtime/NEON/functions/NEThreshold.h @@ -34,6 +34,7 @@ namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; /** Basic function to run @ref NEThresholdKernel */ class NEThreshold : public INESimpleFunctionNoBorder diff --git a/arm_compute/runtime/NEON/functions/NETile.h b/arm_compute/runtime/NEON/functions/NETile.h index 53a94db583..d5ce76c9cf 100644 --- a/arm_compute/runtime/NEON/functions/NETile.h +++ b/arm_compute/runtime/NEON/functions/NETile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,6 +31,7 @@ namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NETileKernel */ class NETile : public INESimpleFunctionNoBorder diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h index 1169459f0f..2651bdd727 100644 --- a/arm_compute/runtime/NEON/functions/NETranspose.h +++ b/arm_compute/runtime/NEON/functions/NETranspose.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,7 @@ namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to transpose a matrix on NEON. This function calls the following NEON kernel: * diff --git a/arm_compute/runtime/NEON/functions/NEUnstack.h b/arm_compute/runtime/NEON/functions/NEUnstack.h index 2e3a679664..c8e85115f7 100644 --- a/arm_compute/runtime/NEON/functions/NEUnstack.h +++ b/arm_compute/runtime/NEON/functions/NEUnstack.h @@ -45,6 +45,16 @@ class NEUnstack : public IFunction public: /** Default constructor */ NEUnstack(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEUnstack(const NEUnstack &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEUnstack &operator=(const NEUnstack &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEUnstack(NEUnstack &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEUnstack &operator=(NEUnstack &&) = delete; + /** Default destructor */ + ~NEUnstack() = default; /** Set the input, output and unstacking axis. * * @param[in] input A tensor to be unstacked. Data type supported: All. diff --git a/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h b/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h index f9145f1612..168845d203 100644 --- a/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h +++ b/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h @@ -24,15 +24,17 @@ #ifndef ARM_COMPUTE_NEUPSAMPLELAYER_H #define ARM_COMPUTE_NEUPSAMPLELAYER_H -#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/Tensor.h" +#include + namespace arm_compute { class ITensor; +class NEUpsampleLayerKernel; /** Function to run upsample layer */ class NEUpsampleLayer : public IFunction @@ -40,6 +42,16 @@ class NEUpsampleLayer : public IFunction public: /** Constructor */ NEUpsampleLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEUpsampleLayer(const NEUpsampleLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEUpsampleLayer &operator=(const NEUpsampleLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEUpsampleLayer(NEUpsampleLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEUpsampleLayer &operator=(NEUpsampleLayer &&) = delete; + /** Default destructor */ + ~NEUpsampleLayer(); /** Set the input output tensors. * * @param[in] input Source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. @@ -66,8 +78,8 @@ public: void run() override; private: - NEUpsampleLayerKernel _kernel; - DataLayout _data_layout; + std::unique_ptr _kernel; + DataLayout _data_layout; }; } // arm_compute #endif /* ARM_COMPUTE_NEUPSAMPLELAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEWarpAffine.h b/arm_compute/runtime/NEON/functions/NEWarpAffine.h index eb7492b71f..6b9a2f4354 100644 --- a/arm_compute/runtime/NEON/functions/NEWarpAffine.h +++ b/arm_compute/runtime/NEON/functions/NEWarpAffine.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h b/arm_compute/runtime/NEON/functions/NEWarpPerspective.h index c439e82db5..caa91db64e 100644 --- a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h +++ b/arm_compute/runtime/NEON/functions/NEWarpPerspective.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h index 4090c8c409..6b61e7031b 100644 --- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h @@ -26,7 +26,6 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CPP/functions/CPPPermute.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -41,6 +40,7 @@ namespace arm_compute { // Forward declarations class ITensor; +class ICPPKernel; /** Basic function to simulate a convolution layer. This function calls the following NEON kernels: * -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method ) @@ -56,6 +56,12 @@ class NEWinogradConvolutionLayer : public IFunction public: /** Constructor */ NEWinogradConvolutionLayer(const std::shared_ptr &memory_manager = nullptr); + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEWinogradConvolutionLayer(NEWinogradConvolutionLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEWinogradConvolutionLayer &operator=(NEWinogradConvolutionLayer &&) = delete; + /** Default destructor */ + ~NEWinogradConvolutionLayer() = default; /** Set the input and output tensors. * @@ -105,12 +111,12 @@ public: NEWinogradConvolutionLayer &operator=(const NEWinogradConvolutionLayer &) = delete; private: - MemoryGroup _memory_group; - NEGEMM _gemm_function; - std::unique_ptr _transform_input_kernel; - std::unique_ptr _transform_output_kernel; - std::unique_ptr _transform_weights_kernel; - NEActivationLayer _activationlayer_function; + MemoryGroup _memory_group; + NEGEMM _gemm_function; + std::unique_ptr _transform_input_kernel; + std::unique_ptr _transform_output_kernel; + std::unique_ptr _transform_weights_kernel; + NEActivationLayer _activationlayer_function; CPPPermute _permute_input; CPPPermute _permute_weights; diff --git a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h b/arm_compute/runtime/NEON/functions/NEYOLOLayer.h index 88219602c1..4c9a5bf6e4 100644 --- a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h +++ b/arm_compute/runtime/NEON/functions/NEYOLOLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,12 +26,12 @@ #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" -#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h" #include "arm_compute/core/Types.h" namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NEYOLOLayerKernel */ class NEYOLOLayer : public INESimpleFunctionNoBorder diff --git a/docs/ComputeLibrary.dir b/docs/ComputeLibrary.dir index c325af29fd..93be52afd7 100644 --- a/docs/ComputeLibrary.dir +++ b/docs/ComputeLibrary.dir @@ -71,15 +71,15 @@ * @brief Folder containing all the GLES kernels */ -/** @dir arm_compute/core/NEON +/** @dir src/core/NEON * @brief NEON backend core: kernels and utilities. */ -/** @file arm_compute/core/NEON/NEKernels.h +/** @file src/core/NEON/NEKernels.h * @brief Includes all the NEON kernels at once */ -/** @dir arm_compute/core/NEON/kernels +/** @dir src/core/NEON/kernels * @brief Folder containing all the NEON kernels */ diff --git a/examples/neon_cartoon_effect.cpp b/examples/neon_cartoon_effect.cpp index dd33885add..24a689bee9 100644 --- a/examples/neon_cartoon_effect.cpp +++ b/examples/neon_cartoon_effect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp index d7af9c9e7a..a134e3e5c1 100644 --- a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp +++ b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp @@ -84,14 +84,14 @@ void CPPCornerCandidatesKernel::configure(const IImage *input, InternalKeypoint update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration)); - INEKernel::configure(win); + ICPPKernel::configure(win); } void CPPCornerCandidatesKernel::run(const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window); Iterator input(_input, window); execute_window_loop(window, [&](const Coordinates & id) diff --git a/src/core/NEON/INEKernel.h b/src/core/NEON/INEKernel.h new file mode 100644 index 0000000000..7ad20166d8 --- /dev/null +++ b/src/core/NEON/INEKernel.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_INEKERNEL_H +#define ARM_COMPUTE_INEKERNEL_H + +#include "arm_compute/core/CPP/ICPPKernel.h" + +namespace arm_compute +{ +/** Common interface for all kernels implemented in NEON. */ +using INEKernel = ICPPKernel; +} // namespace arm_compute +#endif /*ARM_COMPUTE_INEKERNEL_H */ diff --git a/src/core/NEON/INESimpleKernel.h b/src/core/NEON/INESimpleKernel.h new file mode 100644 index 0000000000..da32d6619e --- /dev/null +++ b/src/core/NEON/INESimpleKernel.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_INESIMPLEKERNEL_H +#define ARM_COMPUTE_INESIMPLEKERNEL_H + +#include "arm_compute/core/CPP/ICPPSimpleKernel.h" + +namespace arm_compute +{ +/** Interface for simple NEON kernels having 1 tensor input and 1 tensor output */ +using INESimpleKernel = ICPPSimpleKernel; +} // namespace arm_compute +#endif /*ARM_COMPUTE_INESIMPLEKERNEL_H */ diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h new file mode 100644 index 0000000000..c1924d6739 --- /dev/null +++ b/src/core/NEON/NEKernels.h @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEKERNELS_H +#define ARM_COMPUTE_NEKERNELS_H + +/* Header regrouping all the NEON kernels */ +#include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" +#include "src/core/NEON/kernels/NEAccumulateKernel.h" +#include "src/core/NEON/kernels/NEActivationLayerKernel.h" +#include "src/core/NEON/kernels/NEArithmeticAdditionKernel.h" +#include "src/core/NEON/kernels/NEArithmeticSubtractionKernel.h" +#include "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" +#include "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h" +#include "src/core/NEON/kernels/NEBitwiseAndKernel.h" +#include "src/core/NEON/kernels/NEBitwiseNotKernel.h" +#include "src/core/NEON/kernels/NEBitwiseOrKernel.h" +#include "src/core/NEON/kernels/NEBitwiseXorKernel.h" +#include "src/core/NEON/kernels/NEBoundingBoxTransformKernel.h" +#include "src/core/NEON/kernels/NEBox3x3Kernel.h" +#include "src/core/NEON/kernels/NECannyEdgeKernel.h" +#include "src/core/NEON/kernels/NEChannelCombineKernel.h" +#include "src/core/NEON/kernels/NEChannelExtractKernel.h" +#include "src/core/NEON/kernels/NEChannelShuffleLayerKernel.h" +#include "src/core/NEON/kernels/NECol2ImKernel.h" +#include "src/core/NEON/kernels/NEColorConvertKernel.h" +#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEConvolutionKernel.h" +#include "src/core/NEON/kernels/NECopyKernel.h" +#include "src/core/NEON/kernels/NECropKernel.h" +#include "src/core/NEON/kernels/NECumulativeDistributionKernel.h" +#include "src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h" +#include "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h" +#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" +#include "src/core/NEON/kernels/NEDequantizationLayerKernel.h" +#include "src/core/NEON/kernels/NEDerivativeKernel.h" +#include "src/core/NEON/kernels/NEDilateKernel.h" +#include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" +#include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" +#include "src/core/NEON/kernels/NEElementwiseOperationKernel.h" +#include "src/core/NEON/kernels/NEElementwiseUnaryKernel.h" +#include "src/core/NEON/kernels/NEErodeKernel.h" +#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h" +#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h" +#include "src/core/NEON/kernels/NEFFTScaleKernel.h" +#include "src/core/NEON/kernels/NEFastCornersKernel.h" +#include "src/core/NEON/kernels/NEFillArrayKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" +#include "src/core/NEON/kernels/NEFloorKernel.h" +#include "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h" +#include "src/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/core/NEON/kernels/NEGatherKernel.h" +#include "src/core/NEON/kernels/NEGaussian3x3Kernel.h" +#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h" +#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h" +#include "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" +#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h" +#include "src/core/NEON/kernels/NEHOGDetectorKernel.h" +#include "src/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEHistogramKernel.h" +#include "src/core/NEON/kernels/NEIm2ColKernel.h" +#include "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h" +#include "src/core/NEON/kernels/NEIntegralImageKernel.h" +#include "src/core/NEON/kernels/NEL2NormalizeLayerKernel.h" +#include "src/core/NEON/kernels/NELKTrackerKernel.h" +#include "src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h" +#include "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h" +#include "src/core/NEON/kernels/NEMeanStdDevKernel.h" +#include "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h" +#include "src/core/NEON/kernels/NEMedian3x3Kernel.h" +#include "src/core/NEON/kernels/NEMemsetKernel.h" +#include "src/core/NEON/kernels/NEMinMaxLayerKernel.h" +#include "src/core/NEON/kernels/NEMinMaxLocationKernel.h" +#include "src/core/NEON/kernels/NENonLinearFilterKernel.h" +#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" +#include "src/core/NEON/kernels/NENormalizationLayerKernel.h" +#include "src/core/NEON/kernels/NEPadLayerKernel.h" +#include "src/core/NEON/kernels/NEPermuteKernel.h" +#include "src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" +#include "src/core/NEON/kernels/NEPoolingLayerKernel.h" +#include "src/core/NEON/kernels/NEPriorBoxLayerKernel.h" +#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" +#include "src/core/NEON/kernels/NEQuantizationLayerKernel.h" +#include "src/core/NEON/kernels/NEROIAlignLayerKernel.h" +#include "src/core/NEON/kernels/NEROIPoolingLayerKernel.h" +#include "src/core/NEON/kernels/NERangeKernel.h" +#include "src/core/NEON/kernels/NEReductionOperationKernel.h" +#include "src/core/NEON/kernels/NERemapKernel.h" +#include "src/core/NEON/kernels/NEReorgLayerKernel.h" +#include "src/core/NEON/kernels/NEReshapeLayerKernel.h" +#include "src/core/NEON/kernels/NEReverseKernel.h" +#include "src/core/NEON/kernels/NEScaleKernel.h" +#include "src/core/NEON/kernels/NEScharr3x3Kernel.h" +#include "src/core/NEON/kernels/NESelectKernel.h" +#include "src/core/NEON/kernels/NESobel3x3Kernel.h" +#include "src/core/NEON/kernels/NESobel5x5Kernel.h" +#include "src/core/NEON/kernels/NESobel7x7Kernel.h" +#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h" +#include "src/core/NEON/kernels/NESpaceToBatchLayerKernel.h" +#include "src/core/NEON/kernels/NESpaceToDepthLayerKernel.h" +#include "src/core/NEON/kernels/NEStackLayerKernel.h" +#include "src/core/NEON/kernels/NEStridedSliceKernel.h" +#include "src/core/NEON/kernels/NETableLookupKernel.h" +#include "src/core/NEON/kernels/NEThresholdKernel.h" +#include "src/core/NEON/kernels/NETileKernel.h" +#include "src/core/NEON/kernels/NETransposeKernel.h" +#include "src/core/NEON/kernels/NEUpsampleLayerKernel.h" +#include "src/core/NEON/kernels/NEWarpKernel.h" +#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h" +#include "src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEYOLOLayerKernel.h" + +#endif /* ARM_COMPUTE_NEKERNELS_H */ diff --git a/src/core/NEON/NETracePoint.cpp b/src/core/NEON/NETracePoint.cpp index 4a6bffa54e..bf48b411ec 100644 --- a/src/core/NEON/NETracePoint.cpp +++ b/src/core/NEON/NETracePoint.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/core/TracePoint.h" -#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" +#include "src/core/NEON/kernels/NELKTrackerKernel.h" #include "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" #include "src/core/NEON/kernels/convolution/common/convolution.hpp" #include "utils/TypePrinter.h" diff --git a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp index acea0af02d..a6a41b8af9 100644 --- a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp +++ b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" +#include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h new file mode 100644 index 0000000000..cc95172f35 --- /dev/null +++ b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H +#define ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the absolute difference kernel + * + * Absolute difference is computed by: + * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] + */ +class NEAbsoluteDifferenceKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEAbsoluteDifferenceKernel"; + } + /** Default constructor */ + NEAbsoluteDifferenceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAbsoluteDifferenceKernel(const NEAbsoluteDifferenceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAbsoluteDifferenceKernel &operator=(const NEAbsoluteDifferenceKernel &) = delete; + /** Allow instances of this class to be moved */ + NEAbsoluteDifferenceKernel(NEAbsoluteDifferenceKernel &&) = default; + /** Allow instances of this class to be moved */ + NEAbsoluteDifferenceKernel &operator=(NEAbsoluteDifferenceKernel &&) = default; + /** Default destructor */ + ~NEAbsoluteDifferenceKernel() = default; + + /** Set the inputs and output tensors + * + * @param[in] input1 Source tensor. Data types supported: U8/S16 + * @param[in] input2 Source tensor. Data types supported: U8/S16 + * @param[out] output Destination tensor, Data types supported: U8/S16 + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised absolute difference functions + * + * @param[in] input1 An input tensor. Data types supported: U8/S16. + * @param[in] input2 An input tensor. Data types supported: U8/S16. + * @param[out] output The output tensor, Data types supported: U8 (Only if both inputs are U8), S16. + * @param[in] window Region on which to execute the kernel. + */ + using AbsDiffFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); + + /** Absolute difference function to use for the particular tensor formats passed to configure() */ + AbsDiffFunction *_func; + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEAccumulateKernel.cpp b/src/core/NEON/kernels/NEAccumulateKernel.cpp index 73ef7eb66f..46179cadcb 100644 --- a/src/core/NEON/kernels/NEAccumulateKernel.cpp +++ b/src/core/NEON/kernels/NEAccumulateKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEAccumulateKernel.h" +#include "src/core/NEON/kernels/NEAccumulateKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -33,13 +33,8 @@ #include -using namespace arm_compute; - namespace arm_compute { -class Coordinates; -} // namespace arm_compute - /* Max S16 value used for saturation purposes. */ const static uint16x8_t max_int_u16 = vdupq_n_u16(static_cast(INT16_MAX)); @@ -361,3 +356,4 @@ void NEAccumulateSquaredKernel::run(const Window &window, const ThreadInfo &info }, input, accum); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/core/NEON/kernels/NEAccumulateKernel.h b/src/core/NEON/kernels/NEAccumulateKernel.h new file mode 100644 index 0000000000..af1298f53f --- /dev/null +++ b/src/core/NEON/kernels/NEAccumulateKernel.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEACCUMULATEKERNEL_H +#define ARM_COMPUTE_NEACCUMULATEKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface for the accumulate kernel + * + * Accumulation is computed by: + * @f[ accum(x,y) = accum(x,y) + input(x,y) @f] + */ +class NEAccumulateKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEAccumulateKernel"; + } + /** Default constructor */ + NEAccumulateKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateKernel(const NEAccumulateKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateKernel &operator=(const NEAccumulateKernel &) = delete; + /** Allow instances of this class to be moved */ + NEAccumulateKernel(NEAccumulateKernel &&) = default; + /** Allow instances of this class to be moved */ + NEAccumulateKernel &operator=(NEAccumulateKernel &&) = default; + /** Default destructor */ + ~NEAccumulateKernel() = default; + /** Set the input and accumulation tensors + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] accum Destination tensor. Data type supported: S16. + */ + void configure(const ITensor *input, ITensor *accum); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; +}; + +/** Interface for the accumulate weighted kernel + * + * Weighted accumulation is computed: + * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f] + * + * Where @f$ 0 \le \alpha \le 1 @f$ + * Conceptually, the rounding for this is defined as: + * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f] +*/ +class NEAccumulateWeightedKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEAccumulateWeightedKernel"; + } + /** Default constructor */ + NEAccumulateWeightedKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateWeightedKernel(const NEAccumulateWeightedKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateWeightedKernel &operator=(const NEAccumulateWeightedKernel &) = delete; + /** Allow instances of this class to be moved */ + NEAccumulateWeightedKernel(NEAccumulateWeightedKernel &&) = default; + /** Allow instances of this class to be moved */ + NEAccumulateWeightedKernel &operator=(NEAccumulateWeightedKernel &&) = default; + /** Default destructor */ + ~NEAccumulateWeightedKernel() = default; + /** Set the input and accumulation tensors, and the scale value + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] alpha Scalar value in the range [0.0f, 1.0f] + * @param[in,out] accum Accumulated tensor. Data type supported: U8. + */ + void configure(const ITensor *input, float alpha, ITensor *accum); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +protected: + float _alpha; +}; + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +/** Interface for the accumulate weighted kernel using F16 */ +class NEAccumulateWeightedFP16Kernel : public NEAccumulateWeightedKernel +{ +public: + const char *name() const override + { + return "NEAccumulateWeightedFP16Kernel"; + } + /** Default constructor */ + NEAccumulateWeightedFP16Kernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateWeightedFP16Kernel(const NEAccumulateWeightedFP16Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateWeightedFP16Kernel &operator=(const NEAccumulateWeightedFP16Kernel &) = delete; + /** Allow instances of this class to be moved */ + NEAccumulateWeightedFP16Kernel(NEAccumulateWeightedFP16Kernel &&) = default; + /** Allow instances of this class to be moved */ + NEAccumulateWeightedFP16Kernel &operator=(NEAccumulateWeightedFP16Kernel &&) = default; + /** Default destructor */ + ~NEAccumulateWeightedFP16Kernel() = default; + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; +}; +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** Interface for the accumulate weighted kernel using F16 */ +using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel; +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + +/** Interface for the accumulate squared kernel + * + * The accumulation of squares is computed: + * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f] + * + * Where @f$ 0 \le shift \le 15 @f$ +*/ +class NEAccumulateSquaredKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEAccumulateSquaredKernel"; + } + /** Default constructor */ + NEAccumulateSquaredKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateSquaredKernel(const NEAccumulateSquaredKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateSquaredKernel &operator=(const NEAccumulateSquaredKernel &) = delete; + /** Allow instances of this class to be moved */ + NEAccumulateSquaredKernel(NEAccumulateSquaredKernel &&) = default; + /** Allow instances of this class to be moved */ + NEAccumulateSquaredKernel &operator=(NEAccumulateSquaredKernel &&) = default; + /** Default destructor */ + ~NEAccumulateSquaredKernel() = default; + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] shift Shift value in the range of [0, 15] + * @param[in,out] accum Accumulated tensor. Data type supported: S16. + */ + void configure(const ITensor *input, uint32_t shift, ITensor *accum); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + uint32_t _shift; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEACCUMULATEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index f61f048a87..51257cb96b 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" +#include "src/core/NEON/kernels/NEActivationLayerKernel.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.h b/src/core/NEON/kernels/NEActivationLayerKernel.h new file mode 100644 index 0000000000..783783c6ab --- /dev/null +++ b/src/core/NEON/kernels/NEActivationLayerKernel.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H +#define ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H + +#include "arm_compute/core/utils/misc/Traits.h" +#include "src/core/NEON/INEKernel.h" + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#include +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the activation layer kernel. */ +class NEActivationLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEActivationLayerKernel"; + } + /** Constructor */ + NEActivationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEActivationLayerKernel(const NEActivationLayerKernel &) = delete; + /** Default move constructor */ + NEActivationLayerKernel(NEActivationLayerKernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEActivationLayerKernel &operator=(const NEActivationLayerKernel &) = delete; + /** Default move assignment operator */ + NEActivationLayerKernel &operator=(NEActivationLayerKernel &&) = default; + /** Default destructor */ + ~NEActivationLayerKernel() = default; + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in, out] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[out] output Destination tensor info. Data type supported: same as @p input + * @param[in] activation_info Activation layer information. + */ + void configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info); + /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel + * + * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result + * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + ActivationLayerInfo _act_info; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index 7f1a35fb55..fa26b903f1 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" +#include "src/core/NEON/kernels/NEArithmeticAdditionKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.h b/src/core/NEON/kernels/NEArithmeticAdditionKernel.h new file mode 100644 index 0000000000..2072ad91bd --- /dev/null +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H +#define ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform addition between two tensors */ +class NEArithmeticAdditionKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEArithmeticAdditionKernel"; + } + /** Default constructor */ + NEArithmeticAdditionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticAdditionKernel(const NEArithmeticAdditionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticAdditionKernel &operator=(const NEArithmeticAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEArithmeticAdditionKernel(NEArithmeticAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEArithmeticAdditionKernel &operator=(NEArithmeticAdditionKernel &&) = default; + /** Default destructor */ + ~NEArithmeticAdditionKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (S16,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,S16) -> S16 + * - (S32,S32) -> S32 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * + * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] policy Overflow policy. + */ + void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy); + /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAdditionKernel + * + * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] policy Overflow policy. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised add functions + * + * @param[in] input1 First input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32 + * @param[out] output The output tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32. + * @param[in] policy Overflow policy. + * @param[in] window Region on which to execute the kernel. + */ + using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const Window &window); + /** Add function to use for the particular tensor types passed to configure() */ + AddFunction *_func; + ConvertPolicy _policy; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp index 49e503fac4..bdd356ad7f 100644 --- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" +#include "src/core/NEON/kernels/NEArithmeticSubtractionKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.h new file mode 100644 index 0000000000..69952d6162 --- /dev/null +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H +#define ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform subtraction between two tensors */ +class NEArithmeticSubtractionKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEArithmeticSubtractionKernel"; + } + /** Default constructor */ + NEArithmeticSubtractionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticSubtractionKernel(const NEArithmeticSubtractionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticSubtractionKernel &operator=(const NEArithmeticSubtractionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEArithmeticSubtractionKernel(NEArithmeticSubtractionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEArithmeticSubtractionKernel &operator=(NEArithmeticSubtractionKernel &&) = default; + /** Default destructor */ + ~NEArithmeticSubtractionKernel() = default; + + /** Initialise the kernel's input and output. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (QASYMM8, QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (S16,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,S16) -> S16 + * - (S32,S32) -> S32 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * + * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32. + * @param[in] policy Overflow policy. Convert policy cannot be WRAP if datatype is quantized. + */ + void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy); + /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtractionKernel + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (QASYMM8, QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (S16,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,S16) -> S16 + * - (S32,S32) -> S32 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * + * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32. + * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised sub functions + * + * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32. + * @param[in] window Region on which to execute the kernel. + * @param[in] is_sat Flag to indicate if the policy is SATURATE. + */ + using SubFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window, bool is_sat); + /** Sub function to use for the particular tensor types passed to configure() */ + SubFunction *_func; + ConvertPolicy _policy; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp index 65ac996f46..ddf69710f9 100644 --- a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h new file mode 100644 index 0000000000..b74a94805d --- /dev/null +++ b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H +#define ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the batch concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class NEBatchConcatenateLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEBatchConcatenateLayerKernel"; + } + /** Default constructor */ + NEBatchConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchConcatenateLayerKernel(const NEBatchConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchConcatenateLayerKernel &operator=(const NEBatchConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBatchConcatenateLayerKernel(NEBatchConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBatchConcatenateLayerKernel &operator=(NEBatchConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~NEBatchConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor info. Data types supported: All. + * @param[in] batch_offset The offset on axis # 3. + * @param[in,out] output Output tensor info. Data types supported: Same as @p input. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEBatchConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: All. + * @param[in] batch_offset The offset on axis # 3. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + using BatchConcatFunction = void(const ITensor *in, ITensor *out, unsigned int batch_offset, const Window &window); + +private: + BatchConcatFunction *_func; + unsigned int _batch_offset; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp index bda396662f..afb08e5d1c 100644 --- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" +#include "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h new file mode 100644 index 0000000000..9312073ce8 --- /dev/null +++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the batch normalization layer kernel. + */ +class NEBatchNormalizationLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEBatchNormalizationLayerKernel"; + } + /** Default constructor */ + NEBatchNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchNormalizationLayerKernel(const NEBatchNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchNormalizationLayerKernel &operator=(const NEBatchNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + NEBatchNormalizationLayerKernel(NEBatchNormalizationLayerKernel &&) = default; + /** Default move assignment operator */ + NEBatchNormalizationLayerKernel &operator=(NEBatchNormalizationLayerKernel &&) = default; + /** Default destructor */ + ~NEBatchNormalizationLayerKernel() = default; + /** Set the input and output tensors. + * + * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place + * + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F16/F32. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input + * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input + * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + */ + void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta = nullptr, const ITensor *gamma = nullptr, float epsilon = 0.001f, + ActivationLayerInfo act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref NEBatchNormalizationLayerKernel + * + * @param[in] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F16/F32. + * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input + * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input + * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const ITensorInfo *mean, const ITensorInfo *var, + const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr, + float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Configure execution function in case of non-fused activation **/ + void configure_non_fused(); + /** Configure execution function in case of fused activation **/ + void configure_fused(); + + /** Template function to run batch normalization on fp32 + * + * @tparam T Specialization data type + * @tparam fused_activation Boolean that flags if its a fused activation or not + * @tparam F Activation function functor to run + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void batch_normalization_nchw(const Window &window); + /** Template function to run batch normalization on fp32 on tensors with NHWC format + * + * @tparam T Specialization data type + * @tparam fused_activation Boolean that flags if its a fused activation or not + * @tparam F Activation function functor to run + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void batch_normalization_nhwc(const Window &window); + /** Common signature for all the batch normalization functions + * + * @param[in] window Region on which to execute the kernel. + */ + using BatchNormFunctionPtr = void (NEBatchNormalizationLayerKernel::*)(const Window &window); + +private: + BatchNormFunctionPtr _func; + ITensor *_input; + ITensor *_output; + const ITensor *_mean; + const ITensor *_var; + const ITensor *_gamma; + const ITensor *_beta; + float _epsilon; + ActivationLayerInfo _act_info; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp index e24d7b6c0a..10207b9cf6 100644 --- a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h" +#include "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h new file mode 100644 index 0000000000..26e8224922 --- /dev/null +++ b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H +#define ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the batch to space kernel */ +class NEBatchToSpaceLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEBatchToSpaceLayerKernel"; + } + /** Default constructor */ + NEBatchToSpaceLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchToSpaceLayerKernel(const NEBatchToSpaceLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchToSpaceLayerKernel &operator=(const NEBatchToSpaceLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBatchToSpaceLayerKernel(NEBatchToSpaceLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBatchToSpaceLayerKernel &operator=(NEBatchToSpaceLayerKernel &&) = default; + /** Default destructor */ + ~NEBatchToSpaceLayerKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const ITensor *input, const ITensor *block_shape, ITensor *output); + /** Initialise the kernel's inputs and output (Static block shape). + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const ITensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[in] output Tensor output. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel (Static block shape). + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[in] output Tensor output. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; /**< Source tensor */ + const ITensor *_block_shape; /**< Block shape tensor */ + ITensor *_output; /**< Destination tensor */ + DataLayout _data_layout; /**< Data layout to be used at run-time */ + + int32_t _block_shape_x; + int32_t _block_shape_y; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBitwiseAndKernel.cpp b/src/core/NEON/kernels/NEBitwiseAndKernel.cpp index 2d49ff825e..4f4de70c3c 100644 --- a/src/core/NEON/kernels/NEBitwiseAndKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseAndKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h" +#include "src/core/NEON/kernels/NEBitwiseAndKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEBitwiseAndKernel.h b/src/core/NEON/kernels/NEBitwiseAndKernel.h new file mode 100644 index 0000000000..e4603f68f6 --- /dev/null +++ b/src/core/NEON/kernels/NEBitwiseAndKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEBITWISEANDKERNEL_H +#define ARM_COMPUTE_NEBITWISEANDKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise AND between XY-planes of two tensors + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f] + */ +class NEBitwiseAndKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEBitwiseAndKernel"; + } + /** Default constructor */ + NEBitwiseAndKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseAndKernel(const NEBitwiseAndKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseAndKernel &operator=(const NEBitwiseAndKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseAndKernel(NEBitwiseAndKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseAndKernel &operator=(NEBitwiseAndKernel &&) = default; + /** Default destructor */ + ~NEBitwiseAndKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input1 An input tensor. Data type supported: U8. + * @param[in] input2 An input tensor. Data type supported: U8 + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input1; /**< Source tensor 1 */ + const ITensor *_input2; /**< Source tensor 2 */ + ITensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEBITWISEANDKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBitwiseNotKernel.cpp b/src/core/NEON/kernels/NEBitwiseNotKernel.cpp index eed9b273ae..c69c4ea046 100644 --- a/src/core/NEON/kernels/NEBitwiseNotKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseNotKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h" +#include "src/core/NEON/kernels/NEBitwiseNotKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEBitwiseNotKernel.h b/src/core/NEON/kernels/NEBitwiseNotKernel.h new file mode 100644 index 0000000000..ba47c38143 --- /dev/null +++ b/src/core/NEON/kernels/NEBitwiseNotKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEBITWISENOTKERNEL_H +#define ARM_COMPUTE_NEBITWISENOTKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise NOT operation + * + * Result is computed by: + * @f[ output(x,y) = \lnot input(x,y) @f] + */ +class NEBitwiseNotKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEBitwiseNotKernel"; + } + /** Default constructor */ + NEBitwiseNotKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseNotKernel(const NEBitwiseNotKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseNotKernel &operator=(const NEBitwiseNotKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseNotKernel(NEBitwiseNotKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseNotKernel &operator=(NEBitwiseNotKernel &&) = default; + /** Default destructor */ + ~NEBitwiseNotKernel() = default; + /** Initialise the kernel's input and output + * + * @param[in] input An input tensor. Data type supported: U8. + * @param[out] output The output tensor. Data type supported: U8. + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEBITWISENOTKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBitwiseOrKernel.cpp b/src/core/NEON/kernels/NEBitwiseOrKernel.cpp index f96117e860..875e6391a5 100644 --- a/src/core/NEON/kernels/NEBitwiseOrKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseOrKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h" +#include "src/core/NEON/kernels/NEBitwiseOrKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEBitwiseOrKernel.h b/src/core/NEON/kernels/NEBitwiseOrKernel.h new file mode 100644 index 0000000000..40ef757d60 --- /dev/null +++ b/src/core/NEON/kernels/NEBitwiseOrKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEBITWISEORKERNEL_H +#define ARM_COMPUTE_NEBITWISEORKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise inclusive OR between two tensors + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f] + */ +class NEBitwiseOrKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEBitwiseOrKernel"; + } + /** Default constructor */ + NEBitwiseOrKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseOrKernel(const NEBitwiseOrKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseOrKernel &operator=(const NEBitwiseOrKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseOrKernel(NEBitwiseOrKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseOrKernel &operator=(NEBitwiseOrKernel &&) = default; + /** Default destructor */ + ~NEBitwiseOrKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input1 An input tensor. Data type supported: U8. + * @param[in] input2 An input tensor. Data type supported: U8 + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input1; /**< Source tensor 1 */ + const ITensor *_input2; /**< Source tensor 2 */ + ITensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEBITWISEORKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBitwiseXorKernel.cpp b/src/core/NEON/kernels/NEBitwiseXorKernel.cpp index 45d2b0a0db..603b49d5eb 100644 --- a/src/core/NEON/kernels/NEBitwiseXorKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseXorKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h" +#include "src/core/NEON/kernels/NEBitwiseXorKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEBitwiseXorKernel.h b/src/core/NEON/kernels/NEBitwiseXorKernel.h new file mode 100644 index 0000000000..24d07a6e18 --- /dev/null +++ b/src/core/NEON/kernels/NEBitwiseXorKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEBITWISEXORKERNEL_H +#define ARM_COMPUTE_NEBITWISEXORKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise exclusive OR (XOR) between two tensors + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f] + */ +class NEBitwiseXorKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEBitwiseXorKernel"; + } + /** Default constructor */ + NEBitwiseXorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseXorKernel(const NEBitwiseXorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseXorKernel &operator=(const NEBitwiseXorKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseXorKernel(NEBitwiseXorKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseXorKernel &operator=(NEBitwiseXorKernel &&) = default; + /** Default destructor */ + ~NEBitwiseXorKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input1 An input tensor. Data type supported: U8. + * @param[in] input2 An input tensor. Data type supported: U8 + * @param[out] output The output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input1; /**< Source tensor 1 */ + const ITensor *_input2; /**< Source tensor 2 */ + ITensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEBITWISEXORKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp index 5a18e88321..03d6e1c600 100644 --- a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp +++ b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h" +#include "src/core/NEON/kernels/NEBoundingBoxTransformKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.h b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.h new file mode 100644 index 0000000000..c080ce6a5c --- /dev/null +++ b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H +#define ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the bounding box kernel */ +class NEBoundingBoxTransformKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEBoundingBoxTransformKernel"; + } + + /** Default constructor */ + NEBoundingBoxTransformKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBoundingBoxTransformKernel(const NEBoundingBoxTransformKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBoundingBoxTransformKernel &operator=(const NEBoundingBoxTransformKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBoundingBoxTransformKernel(NEBoundingBoxTransformKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBoundingBoxTransformKernel &operator=(NEBoundingBoxTransformKernel &&) = default; + /** Default destructor */ + ~NEBoundingBoxTransformKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. + * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input. + * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. + * + * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. + * + */ + void configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info); + + /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform + * + * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. + * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input. + * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. + * + * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. + * + * @return a Status + */ + static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + template + void internal_run(const Window &window); + + const ITensor *_boxes; + ITensor *_pred_boxes; + const ITensor *_deltas; + BoundingBoxTransformInfo _bbinfo; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBox3x3Kernel.cpp b/src/core/NEON/kernels/NEBox3x3Kernel.cpp index 1177f6f1dd..2aa8aa8e99 100644 --- a/src/core/NEON/kernels/NEBox3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEBox3x3Kernel.cpp @@ -21,14 +21,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBox3x3Kernel.h" +#include "src/core/NEON/kernels/NEBox3x3Kernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEBox3x3Kernel.h b/src/core/NEON/kernels/NEBox3x3Kernel.h new file mode 100644 index 0000000000..f6a64a7bb4 --- /dev/null +++ b/src/core/NEON/kernels/NEBox3x3Kernel.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEBOX3x3KERNEL_H +#define ARM_COMPUTE_NEBOX3x3KERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a Box 3x3 filter */ +class NEBox3x3Kernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEBox3x3Kernel"; + } + /** Default constructor */ + NEBox3x3Kernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBox3x3Kernel(const NEBox3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBox3x3Kernel &operator=(const NEBox3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NEBox3x3Kernel(NEBox3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NEBox3x3Kernel &operator=(NEBox3x3Kernel &&) = default; + /** Default destructor */ + ~NEBox3x3Kernel() = default; + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; +}; + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +/** NEON kernel to perform a Box 3x3 filter for FP16 datatype + */ +class NEBox3x3FP16Kernel : public NEBox3x3Kernel +{ +public: + const char *name() const override + { + return "NEBox3x3FP16Kernel"; + } + /** Default constructor */ + NEBox3x3FP16Kernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBox3x3FP16Kernel(const NEBox3x3FP16Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBox3x3FP16Kernel &operator=(const NEBox3x3FP16Kernel &) = delete; + /** Allow instances of this class to be moved */ + NEBox3x3FP16Kernel(NEBox3x3FP16Kernel &&) = default; + /** Allow instances of this class to be moved */ + NEBox3x3FP16Kernel &operator=(NEBox3x3FP16Kernel &&) = default; + /** Default destructor */ + ~NEBox3x3FP16Kernel() = default; + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; +}; +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** NEON kernel to perform a Box 3x3 filter for FP16 datatype */ +using NEBox3x3FP16Kernel = NEBox3x3Kernel; +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEBOX3x3KERNEL_H */ diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.cpp b/src/core/NEON/kernels/NECannyEdgeKernel.cpp index da33c1b1ea..7a2bf20c04 100644 --- a/src/core/NEON/kernels/NECannyEdgeKernel.cpp +++ b/src/core/NEON/kernels/NECannyEdgeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h" +#include "src/core/NEON/kernels/NECannyEdgeKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -41,22 +41,14 @@ #include #include -using namespace arm_compute; - namespace arm_compute { -class Coordinates; -} // namespace arm_compute - namespace { constexpr int NO_EDGE = 0; constexpr int EDGE = 255; constexpr int MAYBE = 127; -} // namespace -namespace -{ inline uint8x8_t phase_quantization(const float32x4x2_t &gx, const float32x4x2_t &gy) { // Constant use for evaluating score1 and score3 @@ -873,6 +865,8 @@ void edge_trace_U8_U8(uint8_t *__restrict input, uint8_t *__restrict output, con } } // namespace +NEGradientKernel::~NEGradientKernel() = default; + NEGradientKernel::NEGradientKernel() : _func(nullptr), _gx(nullptr), _gy(nullptr), _magnitude(nullptr), _phase(nullptr) { @@ -961,6 +955,7 @@ void NEGradientKernel::run(const Window &window, const ThreadInfo &info) gx, gy, magnitude, phase); } +NEEdgeNonMaxSuppressionKernel::~NEEdgeNonMaxSuppressionKernel() = default; NEEdgeNonMaxSuppressionKernel::NEEdgeNonMaxSuppressionKernel() : _func(nullptr), _magnitude(nullptr), _phase(nullptr), _output(nullptr), _lower_thr(0), _upper_thr(0) { @@ -1045,6 +1040,7 @@ void NEEdgeNonMaxSuppressionKernel::run(const Window &window, const ThreadInfo & magnitude, phase, output); } +NEEdgeTraceKernel::~NEEdgeTraceKernel() = default; NEEdgeTraceKernel::NEEdgeTraceKernel() : _input(nullptr), _output(nullptr) { @@ -1123,3 +1119,4 @@ void NEEdgeTraceKernel::run(const Window &window, const ThreadInfo &info) }, input, output); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.h b/src/core/NEON/kernels/NECannyEdgeKernel.h new file mode 100644 index 0000000000..eff735259d --- /dev/null +++ b/src/core/NEON/kernels/NECannyEdgeKernel.h @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECANNYEDGEKERNEL_H +#define ARM_COMPUTE_NECANNYEDGEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Computes magnitude and quantised phase from inputs gradients. */ +class NEGradientKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGradientKernel"; + } + /** Default constructor */ + NEGradientKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGradientKernel(const NEGradientKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGradientKernel &operator=(const NEGradientKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGradientKernel(NEGradientKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGradientKernel &operator=(NEGradientKernel &&) = default; + /** Default destructor */ + ~NEGradientKernel(); + + /** Initialise the kernel's sources, destinations and border mode. + * + * @note gx, gy and magnitude must all be the same size (either 16 or 32) + * + * @param[in] gx Source tensor - Gx component. Data type supported: S16/S32. + * @param[in] gy Source tensor - Gy component. Data type supported: same as @p gx. + * @param[out] magnitude Destination tensor - Magnitude. Data type supported: U16 (if the data type of @p gx is S16) / U32 (if the data type of @p gx is S32). + * @param[out] phase Destination tensor - Quantized phase. Data type supported: U8. + * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm + */ + virtual void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +protected: + /** Common signature for all the specialised gradient functions + * + * @param[in] gx_ptr Pointer to the first input tensor. + * @param[in] gy_ptr Pointer to the second input tensor. + * @param[out] magnitude_ptr Pointer to the first output tensor + * @param[out] phase_ptr Pointer to the second output tensor + */ + using GradientFunction = void(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr); + + GradientFunction *_func; /**< Gradient function to use for the particular tensor types passed to configure() */ + const ITensor *_gx; /**< Source tensor - Gx component */ + const ITensor *_gy; /**< Source tensor - Gy component */ + ITensor *_magnitude; /**< Destination tensor - Magnitude */ + ITensor *_phase; /**< Destination tensor - Quantized phase */ +}; + +/** NEON kernel to perform Non-Maxima suppression for Canny Edge. + * + * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input + * to characterize points as possible edges. Thus, at the end, each point will be set to EDGE, NO_EDGE or MAYBE. + * + * @note Hysteresis is computed in @ref NEEdgeTraceKernel + */ +class NEEdgeNonMaxSuppressionKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEEdgeNonMaxSuppressionKernel"; + } + /** Default constructor */ + NEEdgeNonMaxSuppressionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeNonMaxSuppressionKernel(const NEEdgeNonMaxSuppressionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeNonMaxSuppressionKernel &operator=(const NEEdgeNonMaxSuppressionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEEdgeNonMaxSuppressionKernel(NEEdgeNonMaxSuppressionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEEdgeNonMaxSuppressionKernel &operator=(NEEdgeNonMaxSuppressionKernel &&) = default; + /** Default destructor */ + ~NEEdgeNonMaxSuppressionKernel(); + + /** Initialise the kernel's sources, destination and border mode. + * + * @param[in] magnitude Source tensor - Magnitude. Data type supported: U16/U32. + * @param[in] phase Source tensor - Quantized phase. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. It will be filled with 0 for "no edge", 127 for "maybe", 255 for "edge" + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *magnitude, const ITensor *phase, ITensor *output, int32_t upper_thr, int32_t lower_thr, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /** Common signature for all the specialised non-maxima suppression functions + * + * @param[in] magnitude_ptr Pointer to the first input tensor. + * @param[in] phase_ptr Pointer to the second input tensor. + * @param[out] output_ptr Pointer to the output tensor + * @param[in] stride_mag Stride of the magnitude tensor + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + */ + using EdgeNonMaxSupprFunction = void(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t upper_thr, + const int32_t lower_thr); + + EdgeNonMaxSupprFunction *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */ + const ITensor *_magnitude; /**< Source tensor - Magnitude */ + const ITensor *_phase; /**< Source tensor - Quantized phase */ + ITensor *_output; /**< Destination tensor */ + int32_t _lower_thr; /**< Lower threshold used for the hysteresis */ + int32_t _upper_thr; /**< Upper threshold used for the hysteresis */ +}; + +/** NEON kernel to perform Edge tracing */ +class NEEdgeTraceKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEEdgeTraceKernel"; + } + /** Default constructor */ + NEEdgeTraceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeTraceKernel(const NEEdgeTraceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeTraceKernel &operator=(const NEEdgeTraceKernel &) = delete; + /** Allow instances of this class to be moved */ + NEEdgeTraceKernel(NEEdgeTraceKernel &&) = default; + /** Allow instances of this class to be moved */ + NEEdgeTraceKernel &operator=(NEEdgeTraceKernel &&) = default; + /** Default destructor */ + ~NEEdgeTraceKernel(); + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in,out] input Source tensor. Data type supported: U8. Must contain 0 for "no edge", 127 for "maybe", 255 for "edge" + * @param[in,out] output Destination tensor. Data type supported: U8. Must be initialized to 0 (No edge). + */ + void configure(ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + bool is_parallelisable() const override; + +private: + ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NECANNYEDGEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEChannelCombineKernel.cpp b/src/core/NEON/kernels/NEChannelCombineKernel.cpp index 7bd380831b..6bfd4c5bda 100644 --- a/src/core/NEON/kernels/NEChannelCombineKernel.cpp +++ b/src/core/NEON/kernels/NEChannelCombineKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h" +#include "src/core/NEON/kernels/NEChannelCombineKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEChannelCombineKernel.h b/src/core/NEON/kernels/NEChannelCombineKernel.h new file mode 100644 index 0000000000..a3372be4d2 --- /dev/null +++ b/src/core/NEON/kernels/NEChannelCombineKernel.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H +#define ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/** Interface for the channel combine kernel */ +class NEChannelCombineKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEChannelCombineKernel"; + } + /** Default constructor */ + NEChannelCombineKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelCombineKernel(const NEChannelCombineKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelCombineKernel &operator=(const NEChannelCombineKernel &) = delete; + /** Allow instances of this class to be moved */ + NEChannelCombineKernel(NEChannelCombineKernel &&) = default; + /** Allow instances of this class to be moved */ + NEChannelCombineKernel &operator=(NEChannelCombineKernel &&) = default; + /** Default destructor */ + ~NEChannelCombineKernel() = default; + + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 + * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 + * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 + * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8 + * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + */ + void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output); + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 + * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 + * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 + * @param[out] output The multi planar output tensor. Formats supported: NV12/NV21/IYUV/YUV444 + */ + void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + bool is_parallelisable() const override; + +private: + /** Combine 3 planes to form a three channel single plane tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_3C(const Window &win); + /** Combine 4 planes to form a four channel single plane tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_4C(const Window &win); + /** Combine 3 planes to form a single plane YUV tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + template + void combine_YUV_1p(const Window &win); + /** Combine 3 planes to form a two plane YUV tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_YUV_2p(const Window &win); + /** Combine 3 planes to form a three plane YUV tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_YUV_3p(const Window &win); + /** Copies a full plane to the output tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void copy_plane(const Window &win, uint32_t plane_id); + /** Common signature for all the specialised ChannelCombine functions + * + * @param[in] window Region on which to execute the kernel. + */ + using ChannelCombineFunction = void (NEChannelCombineKernel::*)(const Window &window); + /** ChannelCombine function to use for the particular tensor types passed to configure() */ + ChannelCombineFunction _func; + std::array _planes; + ITensor *_output; + IMultiImage *_output_multi; + std::array _x_subsampling; + std::array _y_subsampling; + unsigned int _num_elems_processed_per_iteration; + bool _is_parallelizable; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEChannelExtractKernel.cpp b/src/core/NEON/kernels/NEChannelExtractKernel.cpp index 86245acd05..d0d1c6852f 100644 --- a/src/core/NEON/kernels/NEChannelExtractKernel.cpp +++ b/src/core/NEON/kernels/NEChannelExtractKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h" +#include "src/core/NEON/kernels/NEChannelExtractKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -29,11 +29,11 @@ #include "arm_compute/core/IMultiImage.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/MultiImageInfo.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEChannelExtractKernel.h b/src/core/NEON/kernels/NEChannelExtractKernel.h new file mode 100644 index 0000000000..0b2847d79c --- /dev/null +++ b/src/core/NEON/kernels/NEChannelExtractKernel.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H +#define ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/** Interface for the channel extract kernel */ +class NEChannelExtractKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEChannelExtractKernel"; + } + /** Default constructor */ + NEChannelExtractKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelExtractKernel(const NEChannelExtractKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelExtractKernel &operator=(const NEChannelExtractKernel &) = delete; + /** Allow instances of this class to be moved */ + NEChannelExtractKernel(NEChannelExtractKernel &&) = default; + /** Allow instances of this class to be moved */ + NEChannelExtractKernel &operator=(NEChannelExtractKernel &&) = default; + /** Default destructor */ + ~NEChannelExtractKernel() = default; + + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 + * @param[in] channel Channel to extract. + * @param[out] output Destination tensor. Format supported: U8 + */ + void configure(const ITensor *input, Channel channel, ITensor *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 + * @param[in] channel Channel to extract. + * @param[out] output Single-planar destination image. Format supported: U8 + */ + void configure(const IMultiImage *input, Channel channel, IImage *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Extract one channel from a two channel planar tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_1C_from_2C_img(const Window &win); + /** Extract one channel from a three channel planar tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_1C_from_3C_img(const Window &win); + /** Extract one channel from a four channel planar tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_1C_from_4C_img(const Window &win); + /** Extract U/V channel from a single planar YUVY/UYVY tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_YUYV_uv(const Window &win); + /** Copies a full plane to the output tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void copy_plane(const Window &win); + /** Common signature for all the specialised ChannelExtract functions + * + * @param[in] window Region on which to execute the kernel. + */ + using ChannelExtractFunction = void (NEChannelExtractKernel::*)(const Window &window); + /** ChannelExtract function to use for the particular tensor types passed to configure() */ + ChannelExtractFunction _func; + unsigned int _lut_index; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H */ diff --git a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp index 6d04d71534..6e16f24956 100644 --- a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp +++ b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h" +#include "src/core/NEON/kernels/NEChannelShuffleLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.h b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.h new file mode 100644 index 0000000000..c7d09df08e --- /dev/null +++ b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H +#define ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the channel shuffle kernel */ +class NEChannelShuffleLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEChannelShuffleLayerKernel"; + } + /** Default constructor */ + NEChannelShuffleLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelShuffleLayerKernel(const NEChannelShuffleLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelShuffleLayerKernel &operator=(const NEChannelShuffleLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEChannelShuffleLayerKernel(NEChannelShuffleLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEChannelShuffleLayerKernel &operator=(NEChannelShuffleLayerKernel &&) = default; + /** Default destructor */ + ~NEChannelShuffleLayerKernel() = default; + /** Configure function's inputs and outputs. + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. + */ + void configure(const ITensor *input, ITensor *output, unsigned int num_groups); + /** Static function to check if given info will lead to a valid configuration of @ref NEChannelShuffleLayerKernel + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; + unsigned int _num_groups; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NECol2ImKernel.cpp b/src/core/NEON/kernels/NECol2ImKernel.cpp index f3192370a6..97b68d1321 100644 --- a/src/core/NEON/kernels/NECol2ImKernel.cpp +++ b/src/core/NEON/kernels/NECol2ImKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" +#include "src/core/NEON/kernels/NECol2ImKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NECol2ImKernel.h b/src/core/NEON/kernels/NECol2ImKernel.h new file mode 100644 index 0000000000..59d1d741b6 --- /dev/null +++ b/src/core/NEON/kernels/NECol2ImKernel.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECOL2IMKERNEL_H +#define ARM_COMPUTE_NECOL2IMKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +#include "arm_compute/core/Size2D.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform col2im reshaping. + * + * Rearranges each matrix column into image blocks. It's the inverse operation of @ref NEIm2ColKernel. + * + * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: + * + * @f[ + * \left( \begin{array}{ccccccccc} + * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccc} + * a0 & a1 & a2 \\ + * a3 & a4 & a5 \\ + * a6 & a7 & a8 \\ + * \end{array} \right) + * @f] + */ +class NECol2ImKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NECol2ImKernel"; + } + /** Default constructor */ + NECol2ImKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECol2ImKernel(const NECol2ImKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECol2ImKernel &operator=(const NECol2ImKernel &) = delete; + /** Allow instances of this class to be moved */ + NECol2ImKernel(NECol2ImKernel &&) = default; + /** Allow instances of this class to be moved */ + NECol2ImKernel &operator=(NECol2ImKernel &&) = default; + /** Default destructor */ + ~NECol2ImKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Data types supported: All + * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input + * @param[in] convolved_dims Output convolved dimensions. + */ + void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims); + /** Static function to check if given info will lead to a valid configuration of @ref NECol2ImKernel + * + * @param[in] input The input tensor to convert. Data types supported: All + * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input + * @param[in] convolved_dims Output convolved dimensions. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run the col2im + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run_col2im(const Window &window); + + /** Common signature for all the specialised col2im functions + * + * @param[in] window Region on which to execute the kernel. + */ + using Col2ImFunctionPtr = void (NECol2ImKernel::*)(const Window &window); + + Col2ImFunctionPtr _func; + const ITensor *_input; + ITensor *_output; + Size2D _convolved_dims; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NECOL2IMKERNEL_H */ diff --git a/src/core/NEON/kernels/NEColorConvertKernel.cpp b/src/core/NEON/kernels/NEColorConvertKernel.cpp index f933a2a898..23270d42d1 100644 --- a/src/core/NEON/kernels/NEColorConvertKernel.cpp +++ b/src/core/NEON/kernels/NEColorConvertKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h" +#include "src/core/NEON/kernels/NEColorConvertKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEColorConvertKernel.h b/src/core/NEON/kernels/NEColorConvertKernel.h new file mode 100644 index 0000000000..1adb624aae --- /dev/null +++ b/src/core/NEON/kernels/NEColorConvertKernel.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_COLORCONVERTKERNEL_H +#define ARM_COMPUTE_COLORCONVERTKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/** Interface for the color convert kernel */ +class NEColorConvertKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEColorConvertKernel"; + } + /** Default constructor */ + NEColorConvertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEColorConvertKernel(const NEColorConvertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEColorConvertKernel &operator=(const NEColorConvertKernel &) = delete; + /** Allow instances of this class to be moved */ + NEColorConvertKernel(NEColorConvertKernel &&) = default; + /** Allow instances of this class to be moved */ + NEColorConvertKernel &operator=(NEColorConvertKernel &&) = default; + /** Default destructor */ + ~NEColorConvertKernel() = default; + + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 + * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), + * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), + * U8 (if the formats of @p input is RGB888) + */ + void configure(const ITensor *input, ITensor *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 + */ + void configure(const IMultiImage *input, IImage *output); + /** Set the input and output of the kernel + * + * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) + */ + void configure(const IImage *input, IMultiImage *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) + */ + void configure(const IMultiImage *input, IMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + using ColorConvertFunction = void(const void *__restrict input_ptr, void *__restrict output_ptr, const Window &win); + const void *_input; + void *_output; + ColorConvertFunction *_func; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NECOLORCONVERTKERNEL_H */ diff --git a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp index 8716cfd9b5..597c283a9c 100644 --- a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp +++ b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" +#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" diff --git a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h new file mode 100644 index 0000000000..766ee8858a --- /dev/null +++ b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H +#define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa. + * + * @note This function can be applied to the 2D weights used by a Fully Connected layer if: + * - It follows a Convolution layer + * - The data layout used by the network does not match the one the model has been trained in. + * + * @note This function assumes the weights are already reshaped (transposed) + */ +class NEConvertFullyConnectedWeightsKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEConvertFullyConnectedWeightsKernel"; + } + /** Default constructor */ + NEConvertFullyConnectedWeightsKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvertFullyConnectedWeightsKernel(const NEConvertFullyConnectedWeightsKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvertFullyConnectedWeightsKernel &operator=(const NEConvertFullyConnectedWeightsKernel &) = delete; + /** Allow instances of this class to be moved */ + NEConvertFullyConnectedWeightsKernel(NEConvertFullyConnectedWeightsKernel &&) = default; + /** Allow instances of this class to be moved */ + NEConvertFullyConnectedWeightsKernel &operator=(NEConvertFullyConnectedWeightsKernel &&) = default; + /** Default destructor */ + ~NEConvertFullyConnectedWeightsKernel() = default; + /** Set the input and output tensor. + * + * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. + * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. + * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). + * @param[in] data_layout The data layout the weights have been trained in. + */ + void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout); + /** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeightsKernel + * + * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All. + * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input. + * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). + * @param[in] data_layout The data layout the weights have been trained in. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run the permute + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run_convert_fc_weights(const Window &window); + + const ITensor *_input; + ITensor *_output; + unsigned int _factor1; /* equals to the number of elements per original input plane if @p data_layout == NCHW; its number of channels otherwise */ + unsigned int _factor2; /* equals to the number of elements per original input plane if @p data_layout == NHWC; its number of channels otherwise */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H */ diff --git a/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp index bd8ea30fb3..1f2170f42a 100644 --- a/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp +++ b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h new file mode 100644 index 0000000000..2f80361ba5 --- /dev/null +++ b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H +#define ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** NEON kernel to convert asymmetric signed to asymmetric signed and vice-versa */ +class NEConvertQuantizedSignednessKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEConvertQuantizedSignednessKernel"; + } + /** Default constructor */ + NEConvertQuantizedSignednessKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEConvertQuantizedSignednessKernel(const NEConvertQuantizedSignednessKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEConvertQuantizedSignednessKernel &operator=(const NEConvertQuantizedSignednessKernel &) = delete; + /** Allow instances of this class to be moved */ + NEConvertQuantizedSignednessKernel(NEConvertQuantizedSignednessKernel &&) = default; + /** Allow instances of this class to be moved */ + NEConvertQuantizedSignednessKernel &operator=(NEConvertQuantizedSignednessKernel &&) = default; + /** Default destructor */ + ~NEConvertQuantizedSignednessKernel() = default; + /** Initialize the kernel's input, output. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data types supported: opposite of @p input. + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED. + * @param[in] output Destination tensor. Data types supported: opposite of @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H */ diff --git a/src/core/NEON/kernels/NEConvolutionKernel.cpp b/src/core/NEON/kernels/NEConvolutionKernel.cpp index 69b65b2816..bac27430f9 100644 --- a/src/core/NEON/kernels/NEConvolutionKernel.cpp +++ b/src/core/NEON/kernels/NEConvolutionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" +#include "src/core/NEON/kernels/NEConvolutionKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEConvolutionKernel.h b/src/core/NEON/kernels/NEConvolutionKernel.h new file mode 100644 index 0000000000..b8bf1d169e --- /dev/null +++ b/src/core/NEON/kernels/NEConvolutionKernel.h @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECONVOLUTIONKERNEL_H +#define ARM_COMPUTE_NECONVOLUTIONKERNEL_H + +#include "src/core/NEON/INEKernel.h" +#include "src/core/NEON/INESimpleKernel.h" + +#include +#include +#include + +namespace arm_compute +{ +class ITensor; + +/****************************************************************************************\ + * Square Convolution * +\****************************************************************************************/ + +/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9). + * The client can supply a convolution matrix \f$ C_{m,n} \f$. + * @f{eqnarray}{ + * k_0 &=& \frac{m}{2} \\ + * l_0 &=& \frac{n}{2} \\ + * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l} + * @f} + * + * @note The above equation for this function is similar to the default OpenCV Filter2D function, + * which actually computes a correlation and not a convolution. + * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically. + */ +template +class NEConvolutionKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEConvolutionKernel"; + } + /** Default constructor */ + NEConvolutionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEConvolutionKernel(const NEConvolutionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEConvolutionKernel &operator=(const NEConvolutionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEConvolutionKernel(NEConvolutionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEConvolutionKernel &operator=(NEConvolutionKernel &&) = default; + /** Default destructor */ + ~NEConvolutionKernel() = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + template + void convolution(const Window &win); + +protected: + uint32_t _scale; /**< scale of the convolution */ + std::array _convolution; /**< convolution matrix */ +}; + +/** Interface for the kernel which applied a 3x3 convolution to a tensor.*/ +using NEConvolution3x3Kernel = NEConvolutionKernel<3>; +/** Interface for the kernel which applied a 5x5 convolution to a tensor.*/ +using NEConvolution5x5Kernel = NEConvolutionKernel<5>; +/** Interface for the kernel which applied a 7x7 convolution to a tensor.*/ +using NEConvolution7x7Kernel = NEConvolutionKernel<7>; +///** Interface for the kernel which applied a 9x9 convolution to a tensor.*/ +using NEConvolution9x9Kernel = NEConvolutionKernel<9>; + +/****************************************************************************************\ + * Separable Square Convolution * +\****************************************************************************************/ + +/** Kernel for the Horizontal pass of a Separable Convolution */ +template +class NESeparableConvolutionHorKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NESeparableConvolutionHorKernel"; + } + /** Default constructor */ + NESeparableConvolutionHorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NESeparableConvolutionHorKernel(const NESeparableConvolutionHorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NESeparableConvolutionHorKernel &operator=(const NESeparableConvolutionHorKernel &) = delete; + /** Allow instances of this class to be moved */ + NESeparableConvolutionHorKernel(NESeparableConvolutionHorKernel &&) = default; + /** Allow instances of this class to be moved */ + NESeparableConvolutionHorKernel &operator=(NESeparableConvolutionHorKernel &&) = default; + /** Default destructor */ + ~NESeparableConvolutionHorKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data types supported: U16, S16, S32. + * @param[in] conv_row Convolution matrix to apply to the input tensor. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv_row, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /** Apply the object's convolution to the given window of the input tensor.. + * + * @param[in] window Window to apply the convolution on. + */ + template + void convolve(const Window &window); + + std::array _conv_row; /**< Convolution coefficients */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel which applied a 5x1 horizontal convolution to a tensor.*/ +using NESeparableConvolution5x5HorKernel = NESeparableConvolutionHorKernel<5>; +/** Interface for the kernel which applied a 7x1 horizontal convolution to a tensor.*/ +using NESeparableConvolution7x7HorKernel = NESeparableConvolutionHorKernel<7>; +/** Interface for the kernel which applied a 9x1 horizontal convolution to a tensor.*/ +using NESeparableConvolution9x9HorKernel = NESeparableConvolutionHorKernel<9>; + +/** Kernel for the Vertical pass of a Separable Convolution */ +template +class NESeparableConvolutionVertKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NESeparableConvolutionVertKernel"; + } + /** Default constructor */ + NESeparableConvolutionVertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NESeparableConvolutionVertKernel(const NESeparableConvolutionVertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NESeparableConvolutionVertKernel &operator=(const NESeparableConvolutionVertKernel &) = delete; + /** Allow instances of this class to be moved */ + NESeparableConvolutionVertKernel(NESeparableConvolutionVertKernel &&) = default; + /** Allow instances of this class to be moved */ + NESeparableConvolutionVertKernel &operator=(NESeparableConvolutionVertKernel &&) = default; + /** Default destructor */ + ~NESeparableConvolutionVertKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U16, S16, S32. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv_col Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv_col, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /** Apply the object's convolution to the given window of the input tensor. + * This function is used if the intermediate values have been stored as U16. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution_u16(const Window &win); + /** Apply the object's convolution to the given window of the input tensor. + * This function is used if the intermediate values have been stored as S16. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution_s16(const Window &win); + /** Apply the object's convolution to the given window of the input tensor. + * This function is used if the intermediate values have been stored as S32. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution_s32(const Window &win); + + std::array _conv_col; /**< Convolution coefficients */ + uint32_t _scale; /**< Convolution's scale */ +}; + +/** Interface for the kernel which applied a 1x5 vertical convolution to a tensor.*/ +using NESeparableConvolution5x5VertKernel = NESeparableConvolutionVertKernel<5>; +/** Interface for the kernel which applied a 1x7 vertical convolution to a tensor.*/ +using NESeparableConvolution7x7VertKernel = NESeparableConvolutionVertKernel<7>; +/** Interface for the kernel which applied a 1x9 vertical convolution to a tensor.*/ +using NESeparableConvolution9x9VertKernel = NESeparableConvolutionVertKernel<9>; + +/****************************************************************************************\ + * Rectangle Convolution * +\****************************************************************************************/ + +/** Kernel for the running convolution on a rectangle matrix. + * + * @note Supports combinations of 3,5,7 and 9. + */ +class NEConvolutionRectangleKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEConvolutionRectangleKernel"; + } + /** Default constructor */ + NEConvolutionRectangleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &) = delete; + /** Allow instances of this class to be moved */ + NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &&) = default; + /** Allow instances of this class to be moved */ + NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &&) = default; + /** Default destructor */ + ~NEConvolutionRectangleKernel() = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] width Width of convolution matrix (Number of columns) + * @param[in] height Height of convolution matrix (Number of rows) + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + unsigned int get_index(uint32_t val); + /** Apply the object's convolution to the given window of the input tensor. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution(const Window &win); + +protected: + const ITensor *_input; /**< Input tensor */ + ITensor *_output; /**< Output tensor */ + uint32_t _scale; /**< Scale of the convolution */ + std::vector _convolution; /**< Convolution matrix */ + BorderSize _border_size; /**< Calculated border width */ + uint32_t _func_idx; /**< Index used to specify convolution function to be used */ + const static unsigned int _nr_supported_sizes + { + 4 + }; /**< Number of supported permutations */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NECONVOLUTIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NECopyKernel.cpp b/src/core/NEON/kernels/NECopyKernel.cpp index b299957b57..337c44c8eb 100644 --- a/src/core/NEON/kernels/NECopyKernel.cpp +++ b/src/core/NEON/kernels/NECopyKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NECopyKernel.h" +#include "src/core/NEON/kernels/NECopyKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NECopyKernel.h b/src/core/NEON/kernels/NECopyKernel.h new file mode 100644 index 0000000000..62b7b803be --- /dev/null +++ b/src/core/NEON/kernels/NECopyKernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECOPYKERNEL_H +#define ARM_COMPUTE_NECOPYKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a copy between two tensors */ +class NECopyKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NECopyKernel"; + } + /** Default constructor */ + NECopyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NECopyKernel(const NECopyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NECopyKernel &operator=(const NECopyKernel &) = delete; + /** Allow instances of this class to be moved */ + NECopyKernel(NECopyKernel &&) = default; + /** Allow instances of this class to be moved */ + NECopyKernel &operator=(NECopyKernel &&) = default; + /** Default destructor */ + ~NECopyKernel() = default; + /** Initialize the kernel's input, output. + * + * @param[in] input Source tensor. Data types supported: All + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] padding (Optional) Padding to be applied to the input tensor + */ + void configure(const ITensor *input, ITensor *output, const PaddingList &padding = PaddingList()); + /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel + * + * @param[in] input Source tensor. Data types supported: All + * @param[in] output Destination tensor. Data types supported: same as @p input. + * @param[in] padding (Optional) Padding to be applied to the input tensor + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding = PaddingList()); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; + PaddingList _padding; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NECOPYKERNEL_H */ diff --git a/src/core/NEON/kernels/NECropKernel.cpp b/src/core/NEON/kernels/NECropKernel.cpp index 5fb55d95a9..c94cdaed22 100644 --- a/src/core/NEON/kernels/NECropKernel.cpp +++ b/src/core/NEON/kernels/NECropKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NECropKernel.h" +#include "src/core/NEON/kernels/NECropKernel.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NECropKernel.h b/src/core/NEON/kernels/NECropKernel.h new file mode 100644 index 0000000000..742215e22b --- /dev/null +++ b/src/core/NEON/kernels/NECropKernel.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEON_CROP_KERNEL_H +#define ARM_COMPUTE_NEON_CROP_KERNEL_H + +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the kernel to perform tensor cropping */ +class NECropKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NECropKernel"; + } + /** Default constructor */ + NECropKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECropKernel(const NECropKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECropKernel &operator=(const NECropKernel &) = delete; + /** Allow instances of this class to be moved */ + NECropKernel(NECropKernel &&) = default; + /** Allow instances of this class to be moved */ + NECropKernel &operator=(NECropKernel &&) = default; + /** Default destructor */ + ~NECropKernel() = default; + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * @note Padding not supported. + * + * @param[in] input Source tensor. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC. + * @param[in] crop_boxes Tensor containing all possible boxes used to crop the image, each represented by 4 normalized values. + * Data type supported: F32 + * @param[in] box_ind One dimensional tensor mapping the @p crop_box_ind to the index of the 3D image in @p input. + * Data type supported: F32 + * @param[out] output Destination tensor. Data type supported: F32 + * @param[in] crop_box_ind Index of the crop box to be used from @p crop_boxes. Default is 0. + * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. + */ + void configure(const ITensor *input, const ITensor *crop_boxes, const ITensor *box_ind, ITensor *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0); + + /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel + * + * @note Supported tensor rank: up to 4 + * @note Padding not supported. + * + * @param[in] input Source tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC. + * @param[in] crop_boxes Tensor info for tensor containing all possible boxes used to crop the image. Data type supported: F32 + * @param[in] box_ind Tensor info for the one dimensional tensor mapping the @p crop_box_ind to the index of the 3D image + * in @p input. Data type supported: F32 + * @param[in] output Destination tensor. Data type supported: F32 + * @param[in] crop_box_ind Index of the crop box to be used from @p crop_boxes. Default is 0. + * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *crop_boxes, const ITensorInfo *box_ind, const ITensorInfo *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0); + + /** Configure output tensor's shape as this can only be determined at runtime. */ + void configure_output_shape(); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + + /** Function to use for in bounds crop for the particular tensor types passed to configure() */ + using InBoundsCropFunction = void(const ITensor *, const ITensor *, float *, Coordinates, int32_t, int32_t, int32_t, bool, bool); + +private: + const ITensor *_input; + const ITensor *_crop_boxes; + const ITensor *_box_ind; + ITensor *_output; + + Coordinates _start; + Coordinates _end; + uint32_t _crop_box_ind; + float _extrapolation_value; + /** The number of rows out of bounds at the start and end of output. */ + std::array _rows_out_of_bounds; + /** The number of columns out of bounds at the start and end of output. */ + std::array _cols_out_of_bounds; + + NECropKernel::InBoundsCropFunction *_in_bounds_crop_function; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEON_CROP_KERNEL_H */ diff --git a/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp b/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp index 5628802783..58a9a2f1fb 100644 --- a/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp +++ b/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h" +#include "src/core/NEON/kernels/NECumulativeDistributionKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NECumulativeDistributionKernel.h b/src/core/NEON/kernels/NECumulativeDistributionKernel.h new file mode 100644 index 0000000000..1f8c65b5fa --- /dev/null +++ b/src/core/NEON/kernels/NECumulativeDistributionKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H +#define ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class IDistribution1D; +class ILut; +class ITensor; +using IImage = ITensor; + +/** Interface for the cumulative distribution (cummulative summmation) calculation kernel. + * + * This kernel calculates the cumulative sum of a given distribution (meaning that each output element + * is the sum of all its previous elements including itself) and creates a lookup table with the normalized + * pixel intensities which is used for improve the constrast of the image. + */ +class NECumulativeDistributionKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NECumulativeDistributionKernel"; + } + /** Default constructor */ + NECumulativeDistributionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECumulativeDistributionKernel(const NECumulativeDistributionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECumulativeDistributionKernel &operator=(const NECumulativeDistributionKernel &) = delete; + /** Allow instances of this class to be moved */ + NECumulativeDistributionKernel(NECumulativeDistributionKernel &&) = default; + /** Allow instances of this class to be moved */ + NECumulativeDistributionKernel &operator=(NECumulativeDistributionKernel &&) = default; + /** Default destructor */ + ~NECumulativeDistributionKernel() = default; + /** Set the input and output distribution. + * + * @param[in] input Input image. Data type supported: U8 + * @param[in] distribution Unnormalized 256-bin distribution of the input image. + * @param[out] cumulative_sum Cummulative distribution (Summed histogram). Should be same size as @p distribution. + * @param[out] output Equalization lookup table. Should consist of 256 entries of U8 elements. + */ + void configure(const IImage *input, const IDistribution1D *distribution, IDistribution1D *cumulative_sum, ILut *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + bool is_parallelisable() const override; + +private: + const IImage *_input; /**< Input image. */ + const IDistribution1D *_distribution; /**< Input histogram of the input image. */ + IDistribution1D *_cumulative_sum; /**< The cummulative distribution. */ + ILut *_output; /**< Output with the equalization lookup table. */ +private: + static const uint32_t _histogram_size = 256; /**< Default histogram size of 256. */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp index b500268477..ba90bfcd4f 100644 --- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h new file mode 100644 index 0000000000..02c5479f93 --- /dev/null +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H +#define ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the depth concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class NEDepthConcatenateLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEDepthConcatenateLayerKernel"; + } + /** Default constructor */ + NEDepthConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConcatenateLayerKernel(const NEDepthConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConcatenateLayerKernel &operator=(const NEDepthConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDepthConcatenateLayerKernel(NEDepthConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDepthConcatenateLayerKernel &operator=(NEDepthConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~NEDepthConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] output Output tensor info. Data types supported: Same as @p input. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + using DepthConcatFunction = void(const ITensor *in, ITensor *out, unsigned int depth_offset, const Window &window); + +private: + DepthConcatFunction *_func; + unsigned int _depth_offset; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp index 259ece7c6f..d6c89a4553 100644 --- a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h" +#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.h b/src/core/NEON/kernels/NEDepthConvertLayerKernel.h new file mode 100644 index 0000000000..30fe1ed2e6 --- /dev/null +++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_DEPTHCONVERTKERNEL_H +#define ARM_COMPUTE_DEPTHCONVERTKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Depth conversion kernel + * This function ignores the scale and zeroPoint of quanized tensors, i.e. QASYMM8 input is treated as uint8 values. + */ +class NEDepthConvertLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEDepthConvertLayerKernel"; + } + /** Default constructor*/ + NEDepthConvertLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConvertLayerKernel(const NEDepthConvertLayerKernel &) = delete; + /** Default move constructor */ + NEDepthConvertLayerKernel(NEDepthConvertLayerKernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConvertLayerKernel &operator=(const NEDepthConvertLayerKernel &) = delete; + /** Default move assignment operator */ + NEDepthConvertLayerKernel &operator=(NEDepthConvertLayerKernel &&) = default; + /** Default destructor */ + ~NEDepthConvertLayerKernel() = default; + /** Set the input and output of the kernel + * + * Valid conversions Input -> Output : + * + * - QASYMM8_SIGNED -> S16, S32, F32, F16 + * - QASYMM8 -> U16, S16, S32, F32, F16 + * - U8 -> U16, S16, S32, F32, F16 + * - U16 -> U8, U32 + * - S16 -> QASYMM8_SIGNED, U8, S32 + * - BFLOAT16 -> F32 + * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8 + * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8 + * - F32 -> QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8 + * + * @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/BFLOAT16/F16/F32. + * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32. + * @param[in] policy Conversion policy. + * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0); + /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayerKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/BFLOAT16/F16/F32. + * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32. + * @param[in] policy Conversion policy + * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; + ConvertPolicy _policy; + uint32_t _shift; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp index 403e7aac9f..6dcc85ec2e 100644 --- a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h" +#include "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h new file mode 100644 index 0000000000..7e18dd88b8 --- /dev/null +++ b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H +#define ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the depth to space kernel */ +class NEDepthToSpaceLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEDepthToSpaceLayerKernel"; + } + /** Default constructor */ + NEDepthToSpaceLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthToSpaceLayerKernel(const NEDepthToSpaceLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthToSpaceLayerKernel &operator=(const NEDepthToSpaceLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDepthToSpaceLayerKernel(NEDepthToSpaceLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDepthToSpaceLayerKernel &operator=(NEDepthToSpaceLayerKernel &&) = default; + /** Default destructor */ + ~NEDepthToSpaceLayerKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape x value. + */ + void configure(const ITensor *input, ITensor *output, int32_t block_shape); + /** Static function to check if given info will lead to a valid configuration of @ref NEDepthToSpaceLayerKernel. + * + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All + * @param[in] output Tensor output info. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ + int32_t _block_shape; /**< Block shape */ + DataLayout _data_layout; /**< Data layout of the operation */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp index 533b374594..6e5322cbc6 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" +#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h new file mode 100644 index 0000000000..713cdcd9d9 --- /dev/null +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H +#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H + +#include "arm_compute/core/utils/misc/Traits.h" +#include "src/core/NEON/INEKernel.h" +#include "support/Requires.h" + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#include +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the kernel to run a depthwise convolution native on a tensor. */ +class NEDepthwiseConvolutionLayerNativeKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEDepthwiseConvolutionLayerNativeKernel"; + } + /** Default constructor */ + NEDepthwiseConvolutionLayerNativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthwiseConvolutionLayerNativeKernel(const NEDepthwiseConvolutionLayerNativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthwiseConvolutionLayerNativeKernel &operator=(const NEDepthwiseConvolutionLayerNativeKernel &) = delete; + /** Default Move Constructor. */ + NEDepthwiseConvolutionLayerNativeKernel(NEDepthwiseConvolutionLayerNativeKernel &&) = default; + /** Default move assignment operator */ + NEDepthwiseConvolutionLayerNativeKernel &operator=(NEDepthwiseConvolutionLayerNativeKernel &&) = default; + /** Default destructor */ + ~NEDepthwiseConvolutionLayerNativeKernel() = default; + /** Initialize the function's source, destination and parameters. + * + * @note Supported data layouts: NHWC + * + * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [IFM, W, H]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * + */ + void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, + const Size2D &dilation = Size2D(1U, 1U)); + /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerNativeKernel + * + * @note Supported data layouts: NHWC + * + * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[in] output Destination tensor info. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, + const Size2D &dilation = Size2D(1U, 1U)); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + template + using FloatEnalber = typename std::enable_if::value, int>::type; + + template = 0> + void run_depthwise(const Window &window, bool has_biases); + + template + using Quantized8bitEnalber = typename std::enable_if < std::is_same::value || std::is_same::value, int >::type; + + template = 0> + void run_depthwise(const Window &window, bool has_biases); + + /** Common signature for all the specialised depthwise convolution native functions + * + * @param[in] window Region on which to execute the kernel. + */ + using DepthwiseFunctionPtr = void (NEDepthwiseConvolutionLayerNativeKernel::*)(const Window &window, bool has_biases); + + DepthwiseFunctionPtr _func; + const ITensor *_input; + const ITensor *_weights; + const ITensor *_biases; + ITensor *_output; + PadStrideInfo _conv_info; + unsigned int _depth_multiplier; + Size2D _dilation; + std::vector _output_multiplier; + std::vector _output_shift; + bool _has_biases; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp index 2f3c6f431c..36e9c92c56 100644 --- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h" +#include "src/core/NEON/kernels/NEDequantizationLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.h b/src/core/NEON/kernels/NEDequantizationLayerKernel.h new file mode 100644 index 0000000000..9cc71922af --- /dev/null +++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the dequantization layer kernel. */ +class NEDequantizationLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEDequantizationLayerKernel"; + } + /** Default constructor */ + NEDequantizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDequantizationLayerKernel(const NEDequantizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDequantizationLayerKernel &operator=(const NEDequantizationLayerKernel &) = delete; + /** Default Move Constructor. */ + NEDequantizationLayerKernel(NEDequantizationLayerKernel &&) = default; + /** Default move assignment operator */ + NEDequantizationLayerKernel &operator=(NEDequantizationLayerKernel &&) = default; + /** Default destructor */ + ~NEDequantizationLayerKernel() = default; + /** Set input, output tensors. + * + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayerKernel + * + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] output Output tensor info. Data types supported: F16/F32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDerivativeKernel.cpp b/src/core/NEON/kernels/NEDerivativeKernel.cpp index 5d3fc01bd2..8d641a33b9 100644 --- a/src/core/NEON/kernels/NEDerivativeKernel.cpp +++ b/src/core/NEON/kernels/NEDerivativeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h" +#include "src/core/NEON/kernels/NEDerivativeKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEDerivativeKernel.h b/src/core/NEON/kernels/NEDerivativeKernel.h new file mode 100644 index 0000000000..112b2b0b28 --- /dev/null +++ b/src/core/NEON/kernels/NEDerivativeKernel.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEDERIVATIVEKERNEL_H +#define ARM_COMPUTE_NEDERIVATIVEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run the derivative along the X/Y directions on a tensor. + * + */ +class NEDerivativeKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEDerivativeKernel"; + } + /** Default constructor */ + NEDerivativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDerivativeKernel(const NEDerivativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDerivativeKernel &operator=(const NEDerivativeKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDerivativeKernel(NEDerivativeKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDerivativeKernel &operator=(NEDerivativeKernel &&) = default; + /** Default destructor */ + ~NEDerivativeKernel() = default; + /** Initialise the kernel's sources, destination and border + * + * @note At least one of output_x or output_y must be set + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /** Function to perform derivative along the X direction on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void derivative_x(const Window &window); + /** Function to perform derivative along the Y direction on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void derivative_y(const Window &window); + /** Function to perform derivative along the X and Y direction on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void derivative_xy(const Window &window); + /** Common signature for all the specialised derivative functions + * + * @param[in] window Region on which to execute the kernel. + */ + using DerivativeFunction = void (NEDerivativeKernel::*)(const Window &window); + /** Derivative function to use for the particular tensor types passed to configure() */ + DerivativeFunction _func; + +private: + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< Output tensor - Derivate along the X direction */ + ITensor *_output_y; /**< Output tensor - Derivate along the Y direction */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEDERIVATIVEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDilateKernel.cpp b/src/core/NEON/kernels/NEDilateKernel.cpp index cc781c699f..dc9ec22c71 100644 --- a/src/core/NEON/kernels/NEDilateKernel.cpp +++ b/src/core/NEON/kernels/NEDilateKernel.cpp @@ -21,13 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDilateKernel.h" +#include "src/core/NEON/kernels/NEDilateKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEDilateKernel.h b/src/core/NEON/kernels/NEDilateKernel.h new file mode 100644 index 0000000000..f1d34318ed --- /dev/null +++ b/src/core/NEON/kernels/NEDilateKernel.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEDILATEKERNEL_H +#define ARM_COMPUTE_NEDILATEKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform boolean image dilatation */ +class NEDilateKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEDilateKernel"; + } + /** Default constructor */ + NEDilateKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDilateKernel(const NEDilateKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDilateKernel &operator=(const NEDilateKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDilateKernel(NEDilateKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDilateKernel &operator=(NEDilateKernel &&) = default; + /** Default destructor */ + ~NEDilateKernel() = default; + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEDILATEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp index 56cd6e62d0..87b9fb1bf1 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" +#include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" #include "src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h" #include "src/core/NEON/wrapper/wrapper.h" diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h new file mode 100644 index 0000000000..94c97cf521 --- /dev/null +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H +#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON interface for Direct Convolution Layer kernel */ +class NEDirectConvolutionLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEDirectConvolutionLayerKernel"; + } + /** Default constructor */ + NEDirectConvolutionLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerKernel(const NEDirectConvolutionLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerKernel &operator=(const NEDirectConvolutionLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerKernel(NEDirectConvolutionLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerKernel &operator=(NEDirectConvolutionLayerKernel &&) = default; + /** Default destructor */ + ~NEDirectConvolutionLayerKernel() = default; + /** Set the input, weights, and output tensors. + * + * @note: DirectConvolution only works in the following configurations: + * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 + * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 + * + * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * The 3rd dimension must be the same as the input's volume 3rd dimension. + * Data type supported:Same as @p input. + * @param[out] output Output tensor. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32 + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info); + /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerKernel + * + * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * The 3rd dimension must be the same as the input's volume 3rd dimension. + * Data type supported:Same as @p input. + * @param[in] output Output tensor. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32 + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /* Template function for optimized convolution NHWC */ + template + void convolve_nhwc_optimized(const Window &window); + + /* Template function for convolution NHWC */ + template + void convolve_nhwc(const Window &window); + + const ITensor *_input; + const ITensor *_weights; + ITensor *_output; + PadStrideInfo _conv_info; + BorderSize _border_size; + unsigned int _kernel_size; + unsigned int _num_weight_elems_read_per_row; + unsigned int _num_elems_read_per_iteration; + unsigned int _num_elems_written_per_iteration; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp index abaaf12e92..de5a88e812 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" +#include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h new file mode 100644 index 0000000000..b1b88103bf --- /dev/null +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H +#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; +/** NEON kernel to accumulate the biases, if provided, or downscale in case of quantized input. + * + * @note We assume bias to be shared + * @note For quantized computations (i.e. @p input of S32 type) the output data type for auto-initialization must be passed as part + * of the @ref DirectConvolutionLayerOutputStageKernelInfo. + */ +class NEDirectConvolutionLayerOutputStageKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEDirectConvolutionLayerOutputStageKernel"; + } + /** Default constructor */ + NEDirectConvolutionLayerOutputStageKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerOutputStageKernel(const NEDirectConvolutionLayerOutputStageKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerOutputStageKernel &operator=(const NEDirectConvolutionLayerOutputStageKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerOutputStageKernel(NEDirectConvolutionLayerOutputStageKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerOutputStageKernel &operator=(NEDirectConvolutionLayerOutputStageKernel &&) = default; + /** Default destructor */ + ~NEDirectConvolutionLayerOutputStageKernel() = default; + /** Set the accumulate buffer and the biases of the kernel. + * + * @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place. + * Data type supported: F16/F32/S32 + * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input + * @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) + * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr. + * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p input is S32 + * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata + */ + void configure(ITensor *input, const ITensor *bias = nullptr, ITensor *output = nullptr, + const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerOutputStageKernel + * + * @param[in] input Input to add the bias to. If @p output is not specified then accumulation is done in-place. + * Data type supported: F16/F32/S32 + * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input + * @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) + * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr. + * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p input is S32 + * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias = nullptr, const ITensorInfo *output = nullptr, + const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo()); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + using OutputStageKernel = void(ITensor *input, const ITensor *bias, const Window &window, ITensor *output, + int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias); + +private: + OutputStageKernel *_func; + ITensor *_input; + const ITensor *_bias; + ITensor *_output; + int _result_fixedpoint_multiplier; + int _result_shift; + int _result_offset_after_shift; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index efe6161096..bb4e9a67b6 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h" +#include "src/core/NEON/kernels/NEElementwiseOperationKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.h b/src/core/NEON/kernels/NEElementwiseOperationKernel.h new file mode 100644 index 0000000000..b0037d357f --- /dev/null +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.h @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H +#define ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for an element-wise operation kernel + * + * Element-wise operation is computed by: + * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f] + * + */ +class NEElementwiseOperationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEElementwiseOperationKernel"; + } + /** Default constructor */ + NEElementwiseOperationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseOperationKernel(const NEElementwiseOperationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseOperationKernel &operator=(const NEElementwiseOperationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEElementwiseOperationKernel(NEElementwiseOperationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEElementwiseOperationKernel &operator=(NEElementwiseOperationKernel &&) = default; + /** Default destructor */ + ~NEElementwiseOperationKernel() = default; + + /** Common signature for all the specialised arithmetic functions + * + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[out] output Output tensor info. Data types supported: Dependent on subclass. + * @param[in] window Region on which to execute the kernel. + */ + using ElementwiseFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +protected: + /** Validate the argument passed to the kernel + * + * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. + * @param[in] output Output tensor. Data types supported: Dependent on subclass. + */ + static Status validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); + + /** Commmon configure function for element-wise operators with no additional options (e.g. Min, Max, SquaredDiff) + * + */ + void configure_common(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + + /** Function to use for the particular tensor types passed to configure() */ + std::function _function; + + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; +}; + +class NEArithmeticOperationKernel : public NEElementwiseOperationKernel +{ +public: + /** Default constructor */ + NEArithmeticOperationKernel() = default; + + /** Configure kernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[out] output Output tensor info. Data types supported: Same as @p input1. + */ + void configure(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + + /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * + * @return a Status + */ + static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + +protected: + // Inherited methods overridden: + static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); +}; + +class NEDivisionOperationKernel : public NEArithmeticOperationKernel +{ +public: + /** Default constructor */ + NEDivisionOperationKernel() = default; + + /** Configure kernel + * + * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[out] output Output tensor info. Data types supported: Same as @p input1. + */ + void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + + /** Static function to check if given info will lead to a valid configuration of @ref NEDivisionOperationKernel + * + * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * + * @return a Status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + +protected: + // Inherited methods overridden: + static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); +}; + +class NEPowerOperationKernel : public NEArithmeticOperationKernel +{ +public: + /** Default constructor */ + NEPowerOperationKernel() = default; + + /** Configure kernel + * + * @param[in] input1 First tensor input info. Data types supported: F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[out] output Output tensor info. Data types supported: Same as @p input1. + */ + void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + + /** Static function to check if given info will lead to a valid configuration of @ref NEPowerOperationKernel + * + * @param[in] input1 First tensor input info. Data types supported: F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * + * @return a Status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + +protected: + // Inherited methods overridden: + static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); +}; + +class NEComparisonOperationKernel : public NEElementwiseOperationKernel +{ +public: + /** Default constructor */ + NEComparisonOperationKernel() = default; + + /** Configure kernel + * + * @param[in] op Comparison operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[out] output Output tensor info. Data types supported: U8. + */ + void configure(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + + /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel + * + * @param[in] op Comparison operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: U8. + * + * @return a Status + */ + static Status validate(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + +protected: + // Inherited methods overridden: + static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp b/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp index 8e4b7eda30..d899643fdc 100644 --- a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h" +#include "src/core/NEON/kernels/NEElementwiseUnaryKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEElementwiseUnaryKernel.h b/src/core/NEON/kernels/NEElementwiseUnaryKernel.h new file mode 100644 index 0000000000..fcf0aa51c5 --- /dev/null +++ b/src/core/NEON/kernels/NEElementwiseUnaryKernel.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H +#define ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for an element-wise unary operation kernel + * + * Element-wise operation is computed by: + * @f[ output(x) = OP(input(x))@f] + * + */ +class NEElementwiseUnaryKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEElementwiseUnaryKernel"; + } + /** Default constructor */ + NEElementwiseUnaryKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseUnaryKernel(const NEElementwiseUnaryKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseUnaryKernel &operator=(const NEElementwiseUnaryKernel &) = delete; + /** Allow instances of this class to be moved */ + NEElementwiseUnaryKernel(NEElementwiseUnaryKernel &&) = default; + /** Allow instances of this class to be moved */ + NEElementwiseUnaryKernel &operator=(NEElementwiseUnaryKernel &&) = default; + /** Default destructor */ + ~NEElementwiseUnaryKernel() = default; + + /** Function to configure the @ref NEElementwiseUnaryKernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input First tensor input. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. + * @param[out] output Output tensor. Data types supported: Same as @p input. + */ + void configure(ElementWiseUnary op, const ITensor *input, ITensor *output); + + /** Static function to check if given info will lead to a valid configuration of @ref NEElementwiseUnaryKernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input First tensor input info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a Status + */ + static Status validate(ElementWiseUnary op, const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised arithmetic functions + * + * @param[in] window Region on which to execute the kernel. + */ + using ElementwiseUnaryPtr = void (NEElementwiseUnaryKernel::*)(const Window &window); + + /** Template function to run elementwise unary operation + * + * @tparam ScalarType Scalar datatype + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void elementwise_op(const Window &window); + + ElementwiseUnaryPtr _func; + const ITensor *_input; + ITensor *_output; + ElementWiseUnary _op; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H */ diff --git a/src/core/NEON/kernels/NEErodeKernel.cpp b/src/core/NEON/kernels/NEErodeKernel.cpp index 31b0f487d6..171a6c828f 100644 --- a/src/core/NEON/kernels/NEErodeKernel.cpp +++ b/src/core/NEON/kernels/NEErodeKernel.cpp @@ -21,13 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEErodeKernel.h" +#include "src/core/NEON/kernels/NEErodeKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEErodeKernel.h b/src/core/NEON/kernels/NEErodeKernel.h new file mode 100644 index 0000000000..54f286780b --- /dev/null +++ b/src/core/NEON/kernels/NEErodeKernel.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEERODEKERNEL_H +#define ARM_COMPUTE_NEERODEKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform boolean image erosion */ +class NEErodeKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEErodeKernel"; + } + /** Default constructor */ + NEErodeKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEErodeKernel(const NEErodeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEErodeKernel &operator=(const NEErodeKernel &) = delete; + /** Allow instances of this class to be moved */ + NEErodeKernel(NEErodeKernel &&) = default; + /** Allow instances of this class to be moved */ + NEErodeKernel &operator=(NEErodeKernel &&) = default; + /** Default destructor */ + ~NEErodeKernel() = default; + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEERODEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp b/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp index d8036f2f60..200ee6bf88 100644 --- a/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp +++ b/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h" +#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEFFTDigitReverseKernel.h b/src/core/NEON/kernels/NEFFTDigitReverseKernel.h new file mode 100644 index 0000000000..f436c364b2 --- /dev/null +++ b/src/core/NEON/kernels/NEFFTDigitReverseKernel.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H +#define ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the digit reverse operation kernel. */ +class NEFFTDigitReverseKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEFFTDigitReverseKernel"; + } + /** Constructor */ + NEFFTDigitReverseKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFTDigitReverseKernel(const NEFFTDigitReverseKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFTDigitReverseKernel &operator=(const NEFFTDigitReverseKernel &) = delete; + /** Default Move Constructor. */ + NEFFTDigitReverseKernel(NEFFTDigitReverseKernel &&) = default; + /** Default move assignment operator */ + NEFFTDigitReverseKernel &operator=(NEFFTDigitReverseKernel &&) = default; + /** Default destructor */ + ~NEFFTDigitReverseKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor). + * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: 2 (complex tensor). + * @param[in] idx Digit reverse index tensor. Data type supported: U32 + * @param[in] config Kernel configuration. + */ + void configure(const ITensor *input, ITensor *output, const ITensor *idx, const FFTDigitReverseKernelInfo &config); + + /** Static function to check if given info will lead to a valid configuration of @ref NEFFTDigitReverseKernel + * + * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor). + * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: 2 (complex tensor). + * @param[in] idx Digit reverse index tensor info. Data type supported: U32 + * @param[in] config Kernel configuration + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + using NEFFTDigitReverseKernelFunctionPtr = void (NEFFTDigitReverseKernel::*)(const Window &window); + + template + void digit_reverse_kernel_axis_0(const Window &window); + + template + void digit_reverse_kernel_axis_1(const Window &window); + + NEFFTDigitReverseKernelFunctionPtr _func; + const ITensor *_input; + ITensor *_output; + const ITensor *_idx; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp b/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp index 1b0af488a2..cb1391ab4e 100644 --- a/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp +++ b/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h" +#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEFFTRadixStageKernel.h b/src/core/NEON/kernels/NEFFTRadixStageKernel.h new file mode 100644 index 0000000000..8a695b790f --- /dev/null +++ b/src/core/NEON/kernels/NEFFTRadixStageKernel.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H +#define ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "src/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the FFT kernel. */ +class NEFFTRadixStageKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEFFTRadixStageKernel"; + } + /** Constructor */ + NEFFTRadixStageKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFTRadixStageKernel(const NEFFTRadixStageKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFTRadixStageKernel &operator=(const NEFFTRadixStageKernel &) = delete; + /** Default Move Constructor. */ + NEFFTRadixStageKernel(NEFFTRadixStageKernel &&) = default; + /** Default move assignment operator */ + NEFFTRadixStageKernel &operator=(NEFFTRadixStageKernel &&) = default; + /** Default destructor */ + ~NEFFTRadixStageKernel() = default; + /** Set the input and output tensors. + * + * @note If the output tensor is nullptr, the FFT will be performed in-place + * + * @param[in,out] input Source tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor). + * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: same as @p input. + * @param[in] config FFT descriptor metadata. + */ + void configure(ITensor *input, ITensor *output, const FFTRadixStageKernelInfo &config); + /** Static function to check if given info will lead to a valid configuration of @ref NEFFTRadixStageKernel + * + * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor). + * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: same as @p input. + * @param[in] config FFT descriptor metadata. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config); + /** Returns the radix that are support by the FFT kernel + * + * @return A set of supported radix + */ + static std::set supported_radix(); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + ITensor *_input; + ITensor *_output; + bool _run_in_place; + unsigned int _Nx; + unsigned int _axis; + unsigned int _radix; + + void set_radix_stage_axis0(const FFTRadixStageKernelInfo &config); + void set_radix_stage_axis1(const FFTRadixStageKernelInfo &config); + + using FFTFunctionPointerAxis0 = std::function; + using FFTFunctionPointerAxis1 = std::function; + + FFTFunctionPointerAxis0 _func_0; + FFTFunctionPointerAxis1 _func_1; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEFFTScaleKernel.cpp b/src/core/NEON/kernels/NEFFTScaleKernel.cpp index 0cb8b84db8..6dc5541e94 100644 --- a/src/core/NEON/kernels/NEFFTScaleKernel.cpp +++ b/src/core/NEON/kernels/NEFFTScaleKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFFTScaleKernel.h" +#include "src/core/NEON/kernels/NEFFTScaleKernel.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEFFTScaleKernel.h b/src/core/NEON/kernels/NEFFTScaleKernel.h new file mode 100644 index 0000000000..24a19f98ba --- /dev/null +++ b/src/core/NEON/kernels/NEFFTScaleKernel.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFFTSCALEKERNEL_H +#define ARM_COMPUTE_NEFFTSCALEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the inverse fft scale kernel. */ +class NEFFTScaleKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEFFTScaleKernel"; + } + /** Constructor */ + NEFFTScaleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFTScaleKernel(const NEFFTScaleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFTScaleKernel &operator=(const NEFFTScaleKernel &) = delete; + /** Default Move Constructor. */ + NEFFTScaleKernel(NEFFTScaleKernel &&) = default; + /** Default move assignment operator */ + NEFFTScaleKernel &operator=(NEFFTScaleKernel &&) = default; + /** Default destructor */ + ~NEFFTScaleKernel() = default; + /** Set the input and output tensors. + * + * @param[in,out] input Source tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor). + * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: 1 (real tensor) or 2 (complex tensor). + * @param[in] config Kernel configuration + */ + void configure(ITensor *input, ITensor *output, const FFTScaleKernelInfo &config); + /** Static function to check if given info will lead to a valid configuration of @ref NEFFTScaleKernel + * + * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor). + * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: 1 (real tensor) or 2 (complex tensor). + * @param[in] config Kernel configuration + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + ITensor *_input; + ITensor *_output; + float _scale; + bool _run_in_place; + bool _is_conj; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEFFTSCALEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEFastCornersKernel.cpp b/src/core/NEON/kernels/NEFastCornersKernel.cpp index 99312f5134..c9280d8dc0 100644 --- a/src/core/NEON/kernels/NEFastCornersKernel.cpp +++ b/src/core/NEON/kernels/NEFastCornersKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h" +#include "src/core/NEON/kernels/NEFastCornersKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEFastCornersKernel.h b/src/core/NEON/kernels/NEFastCornersKernel.h new file mode 100644 index 0000000000..a4086afb0c --- /dev/null +++ b/src/core/NEON/kernels/NEFastCornersKernel.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFASTCORNERSKERNEL_H +#define ARM_COMPUTE_NEFASTCORNERSKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** NEON kernel to perform fast corners */ +class NEFastCornersKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEFastCornersKernel"; + } + /** Constructor */ + NEFastCornersKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFastCornersKernel(const NEFastCornersKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFastCornersKernel &operator=(const NEFastCornersKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFastCornersKernel(NEFastCornersKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFastCornersKernel &operator=(NEFastCornersKernel &&) = default; + /** Default destructor */ + ~NEFastCornersKernel() = default; + /** Initialise the kernel. + * + * @param[in] input Source image. Data type supported: U8. + * @param[out] output Output image. Data type supported: U8. + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const IImage *input, IImage *output, uint8_t threshold, bool non_max_suppression, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + const IImage *_input; /**< source image */ + IImage *_output; /**< inermediate results */ + uint8_t _threshold; /**< threshold on difference between intensity */ + bool _non_max_suppression; /** true if non-maxima suppression is applied in the next stage */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEFASTCORNERSKERNEL_H */ diff --git a/src/core/NEON/kernels/NEFillArrayKernel.cpp b/src/core/NEON/kernels/NEFillArrayKernel.cpp index 93798db6c3..e8ae926fbf 100644 --- a/src/core/NEON/kernels/NEFillArrayKernel.cpp +++ b/src/core/NEON/kernels/NEFillArrayKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h" +#include "src/core/NEON/kernels/NEFillArrayKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEFillArrayKernel.h b/src/core/NEON/kernels/NEFillArrayKernel.h new file mode 100644 index 0000000000..c9841679d1 --- /dev/null +++ b/src/core/NEON/kernels/NEFillArrayKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFILLARRAYKERNEL_H +#define ARM_COMPUTE_NEFILLARRAYKERNEL_H + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** This kernel adds all texels greater than or equal to the threshold value to the keypoint array. */ +class NEFillArrayKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEFillArrayKernel"; + } + /** Default contructor */ + NEFillArrayKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillArrayKernel(const NEFillArrayKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillArrayKernel &operator=(const NEFillArrayKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFillArrayKernel(NEFillArrayKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFillArrayKernel &operator=(NEFillArrayKernel &&) = default; + /** Default detructor */ + ~NEFillArrayKernel() = default; + + /** Initialise the kernel. + * + * @param[in] input Source image. Data type supported: U8. + * @param[in] threshold Texels greater than the threshold will be added to the array. + * @param[out] output Arrays of keypoints to store the results. + */ + void configure(const IImage *input, uint8_t threshold, IKeyPointArray *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + bool is_parallelisable() const override; + +private: + const IImage *_input; + IKeyPointArray *_output; + uint8_t _threshold; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEFILLARRAYKERNEL_H*/ diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp index c1dd5cf81f..488079062b 100644 --- a/src/core/NEON/kernels/NEFillBorderKernel.cpp +++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -30,6 +30,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEFillBorderKernel.h b/src/core/NEON/kernels/NEFillBorderKernel.h new file mode 100644 index 0000000000..65908bebee --- /dev/null +++ b/src/core/NEON/kernels/NEFillBorderKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFILLBORDERKERNEL_H +#define ARM_COMPUTE_NEFILLBORDERKERNEL_H + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the kernel to fill borders */ +class NEFillBorderKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEFillBorderKernel"; + } + /** Default Constructor */ + NEFillBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillBorderKernel(const NEFillBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillBorderKernel &operator=(const NEFillBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFillBorderKernel(NEFillBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFillBorderKernel &operator=(NEFillBorderKernel &&) = default; + /** Default destructor */ + ~NEFillBorderKernel() = default; + + /** Initialise the function. + * + * @note This kernel fills the borders within the XY-planes. + * + * @param[in,out] tensor Tensor to process. Data types supported: All. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + void fill_replicate_single_channel(const Window &window); + void fill_constant_value_single_channel(const Window &window); + + ITensor *_tensor; + BorderSize _border_size; + BorderMode _mode; + PixelValue _constant_border_value; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEFILLBORDERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp index e6b34b6165..8c0dc10ee8 100644 --- a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp +++ b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" +#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEFlattenLayerKernel.h b/src/core/NEON/kernels/NEFlattenLayerKernel.h new file mode 100644 index 0000000000..5fd5f436b2 --- /dev/null +++ b/src/core/NEON/kernels/NEFlattenLayerKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFLATTENLAYERKERNEL_H +#define ARM_COMPUTE_NEFLATTENLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the flatten layer kernel. */ +class NEFlattenLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEFlattenLayerKernel"; + } + /** Default constructor */ + NEFlattenLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFlattenLayerKernel(const NEFlattenLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFlattenLayerKernel &operator=(const NEFlattenLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFlattenLayerKernel(NEFlattenLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFlattenLayerKernel &operator=(NEFlattenLayerKernel &&) = default; + /** Default destructor */ + ~NEFlattenLayerKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input First input tensor to flatten with at least 3 dimensions. + * The dimensions above the third will be interpreted as batches. Data types supported: All + * @param[out] output Output tensor with shape [w*h*d, input_batches] where: + * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEFlattenLayerKernel + * + * @param[in] input First input tensor to flatten with at least 3 dimensions. + * The dimensions above the third will be interpreted as batches. Data types supported: All + * @param[out] output Output tensor with shape [w*h*d, input_batches] where: + * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEFLATTENLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEFloorKernel.cpp b/src/core/NEON/kernels/NEFloorKernel.cpp index 48f964c6a2..2750acdda7 100644 --- a/src/core/NEON/kernels/NEFloorKernel.cpp +++ b/src/core/NEON/kernels/NEFloorKernel.cpp @@ -21,14 +21,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFloorKernel.h" +#include "src/core/NEON/kernels/NEFloorKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Validate.h" #include "src/core/CPP/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEFloorKernel.h b/src/core/NEON/kernels/NEFloorKernel.h new file mode 100644 index 0000000000..99c016bac5 --- /dev/null +++ b/src/core/NEON/kernels/NEFloorKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFLOORKERNEL_H +#define ARM_COMPUTE_NEFLOORKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a floor operation */ +class NEFloorKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEFloorKernel"; + } + /** Constructor */ + NEFloorKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFloorKernel(const NEFloorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFloorKernel &operator=(const NEFloorKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFloorKernel(NEFloorKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFloorKernel &operator=(NEFloorKernel &&) = default; + /** Default destructor */ + ~NEFloorKernel() = default; + /** Set the source, destination of the kernel + * + * @param[in] input Source tensor. Data type supported: F16/F32. + * @param[out] output Destination tensor. Same as @p input + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEFloorKernel + * + * @param[in] input Source tensor info. Data type supported: F16/F32. + * @param[in] output Destination tensor info. Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEFLOORKERNEL_H */ diff --git a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp index e353df1c39..99f830fe06 100644 --- a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h" +#include "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h new file mode 100644 index 0000000000..ee767b01c8 --- /dev/null +++ b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H +#define ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** OpenNE kernel to fuse the batch normalization node to a preceding convolution node */ +class NEFuseBatchNormalizationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEFuseBatchNormalizationKernel"; + } + /** Default constructor */ + NEFuseBatchNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFuseBatchNormalizationKernel(const NEFuseBatchNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFuseBatchNormalizationKernel &operator=(const NEFuseBatchNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFuseBatchNormalizationKernel(NEFuseBatchNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFuseBatchNormalizationKernel &operator=(NEFuseBatchNormalizationKernel &&) = default; + /** Default destructor */ + ~NEFuseBatchNormalizationKernel() = default; + /** Set the source, destination of the kernel + * + * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC + * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights + * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights + * @param[out] fused_weights (Optional) Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights + * @param[out] fused_bias (Optional) Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights + * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights + * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights + * @note if nullptr, bn_beta is set to 0.0 + * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights + * @note if nullptr, bn_gamma is set to 1.0 + * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. + * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. + */ + void configure(const ITensor *input_weights, const ITensor *bn_mean, const ITensor *bn_var, ITensor *fused_weights, ITensor *fused_bias, + const ITensor *input_bias = nullptr, const ITensor *bn_beta = nullptr, const ITensor *bn_gamma = nullptr, + float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + /** Static function to check if given info will lead to a valid configuration of @ref NEFuseBatchNormalizationKernel + * + * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC + * @param[in] bn_mean Batch normalization layer mean tensor info. Same as @p input_weights + * @param[in] bn_var Batch normalization layer variance tensor info. Same as @p input_weights + * @param[in] fused_weights (Optional) Output fused weights tensor info. It can be a nullptr in case of in-place computation. Same as @p input_weights + * @param[in] fused_bias (Optional) Output fused bias tensor info. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights + * @param[in] input_bias (Optional) Input bias tensor info for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights + * @param[in] bn_beta (Optional) Batch normalization layer beta tensor info. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights + * @note if nullptr, bn_beta is set to 0.0 + * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor info. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights + * @note if nullptr, bn_gamma is set to 1.0 + * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. + * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. + * + * @return a status + */ + static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, + const ITensorInfo *fused_weights, const ITensorInfo *fused_bias, + const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr, + float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input_weights; + const ITensor *_input_bias; + const ITensor *_bn_mean; + const ITensor *_bn_var; + const ITensor *_bn_gamma; + const ITensor *_bn_beta; + ITensor *_fused_weights; + ITensor *_fused_bias; + float _epsilon; + bool _run_in_place_weights; + bool _run_in_place_bias; + + using FuseBatchNormFunction = void(const ITensor *input_weights, const ITensor *input_bias, ITensor *fused_weights, ITensor *fused_bias, + const ITensor *bn_mean, const ITensor *bn_var, const ITensor *bn_beta, const ITensor *bn_gamma, float epsilon, const Window &window); + + FuseBatchNormFunction *_func; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h b/src/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h new file mode 100644 index 0000000000..775a2c06ab --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMASSEMBLYBASE_H +#define ARM_COMPUTE_NEGEMMASSEMBLYBASE_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Base class for GEMM NEON kernels implemented in Assembly. */ +class NEGEMMAssemblyBaseKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMAssemblyBaseKernel"; + } + /** Constructor */ + NEGEMMAssemblyBaseKernel() + : _input0(nullptr), _input1(nullptr), _output(nullptr), _workspace(nullptr), _alpha(1.f), _beta(0.f), _is_transposed_0(false), _is_transposed_1(false) + { + } + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMAssemblyBaseKernel(const NEGEMMAssemblyBaseKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMAssemblyBaseKernel &operator=(const NEGEMMAssemblyBaseKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMAssemblyBaseKernel(NEGEMMAssemblyBaseKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMAssemblyBaseKernel &operator=(NEGEMMAssemblyBaseKernel &&) = default; + + virtual ~NEGEMMAssemblyBaseKernel() = default; + + /** Initialise the kernel's input and output. + * + * The computed function is C = a * AxB + b * C. + * + * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F32 + * @param[in] input1 Input tensor containing the Matrix B. Data types supported: same as @p input0 + * @param[in,out] output Output tensor to store the result of matrix multiplication. If @p beta is not zero the values are multiplied by @p beta before the result is accumulated. Otherwise the values are overwritten by the result. Data types supported: same as @p input0. + * @param[out] workspace Space for intermediate results. + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the accumulation. + * @param[in] is_transposed_0 (Optional)True if @p input0 is transposed else false. (Defaults to false) + * @param[in] is_transposed_1 (Optional)True if @p input1 is transposed else false. (Defaults to false) + */ + void configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha = 1.f, float beta = 0.f, bool is_transposed_0 = false, bool is_transposed_1 = false) + { + internal_configure(input0, input1, output, workspace, alpha, beta, is_transposed_0, is_transposed_1); + } + +protected: + virtual void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool _is_transposed_0, bool _is_transposed_1) = 0; + + const ITensor *_input0; + const ITensor *_input1; + ITensor *_output; + ITensor *_workspace; + float _alpha; + float _beta; + bool _is_transposed_0; + bool _is_transposed_1; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEGEMMASSEMBLYBASE_H*/ diff --git a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp index 2997c1d003..5d178ea85b 100644 --- a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp +++ b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp @@ -21,16 +21,16 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h new file mode 100644 index 0000000000..85939ebae9 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H +#define ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to interleave the elements of a matrix + * + * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values) + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\ + * \end{array} \right) + * @f] + * + * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ] + */ +class NEGEMMInterleave4x4Kernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEGEMMInterleave4x4Kernel"; + } + /** Constructor */ + NEGEMMInterleave4x4Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMInterleave4x4Kernel(const NEGEMMInterleave4x4Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMInterleave4x4Kernel &operator=(const NEGEMMInterleave4x4Kernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMInterleave4x4Kernel(NEGEMMInterleave4x4Kernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMInterleave4x4Kernel &operator=(NEGEMMInterleave4x4Kernel &&) = default; + /** Default destructor */ + ~NEGEMMInterleave4x4Kernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input. + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMInterleave4x4Kernel + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run gemm interleave 4x4 + * + * @tparam ScalarType Scalar datatype + * + * @param[in] input Input tensor. Data types supported: uint32_t, uint16_t and uint8_t + * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void gemm_interleave4x4(const ITensor *input, ITensor *output, const Window &window); + + /** Common signature for all the specialised gemm interleave 4x4 functions + * + * @param[in] input Input tensor. Data types supported: uint32_t, uint16_t and uint8_t + * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + using GEMMInterleaveFunctionFuncPtr = void (NEGEMMInterleave4x4Kernel::*)(const ITensor *input, ITensor *output, const Window &window); + + GEMMInterleaveFunctionFuncPtr _func; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H*/ diff --git a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp index acc519012b..4dbfc3b022 100644 --- a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h new file mode 100644 index 0000000000..14d03fe3eb --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H +#define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to multiply matrices + * + * @note @ref NEGEMMLowpMatrixMultiplyKernel low precision matrix product kernel + * This kernel performs the following computation: + * + * -# Convert a values from int8 to int32 + * -# Convert b values from int8 to int32 + * -# Compute the int32 matrix product of the resulting a * b and store the result as int32 + * + */ +class NEGEMMLowpMatrixMultiplyKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpMatrixMultiplyKernel"; + } + /** Constructor */ + NEGEMMLowpMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpMatrixMultiplyKernel(const NEGEMMLowpMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpMatrixMultiplyKernel &operator=(const NEGEMMLowpMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixMultiplyKernel(NEGEMMLowpMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixMultiplyKernel &operator=(NEGEMMLowpMatrixMultiplyKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpMatrixMultiplyKernel() = default; + /** Initialise the kernel's input and output. + * + * The input matrices @p input0 and @p input1 must be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel. These two + * kernels change the layout of the original matrices to be more cache-friendly. + * + * @param[in] input0 Input tensor containing the interleaved Matrix A. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED + * @param[in] input1 Input tensor containing the transposed1xW Matrix B. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 + */ + void configure(const ITensor *input0, const ITensor *input1, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyKernel + * + * @param[in] input0 Input tensor info containing the interleaved Matrix A. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED + * @param[in] input1 Input tensor info containing the transposed Matrix B. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[in] output Output tensor info to store the result of matrix multiplication. Data type supported: S32 + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input0; + const ITensor *_input1; + ITensor *_output; + bool _slide_matrix_b; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H*/ diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp index 1c76926546..174a06955f 100644 --- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h new file mode 100644 index 0000000000..0f37e584b9 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H +#define ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel used to add the offset contribution after @ref NEGEMMLowpMatrixMultiplyKernel. The computation is performed in-place + * + * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), + * and adds to it the offset contribution of matrix A and matrix B in-place. + * + * The final result is: + * + * mm_result[i][k] = mm_result[i][k] + + * (vector_sum_col[k] * a_offset) + + * (vector_sum_row[i] * b_offset) + + * (a_offset * b_offset * k) + * + */ +class NEGEMMLowpOffsetContributionKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpOffsetContributionKernel"; + } + /** Constructor */ + NEGEMMLowpOffsetContributionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpOffsetContributionKernel(const NEGEMMLowpOffsetContributionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpOffsetContributionKernel &operator=(const NEGEMMLowpOffsetContributionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpOffsetContributionKernel(NEGEMMLowpOffsetContributionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpOffsetContributionKernel &operator=(NEGEMMLowpOffsetContributionKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpOffsetContributionKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in, out] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] k Number of matrix A columns or Matrix B rows + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + */ + void configure(ITensor *mm_result, const ITensor *vector_sum_col, const ITensor *vector_sum_row, int32_t k, int32_t a_offset, int32_t b_offset); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpOffsetContributionKernel + * + * @param[in] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * + * @return a status + */ + static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, int32_t a_offset, int32_t b_offset); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_vector_sum_col; + const ITensor *_vector_sum_row; + ITensor *_mm_result; + int32_t _a_offset; + int32_t _b_offset; + int32_t _k_offset; + bool _slide_vector_sum_col; +}; +} // namespace arm_compute + +#endif /* ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp index 6a7d225167..3c8f5ae022 100644 --- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h new file mode 100644 index 0000000000..4c68fb0943 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H +#define ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel used to add the offset contribution and perform the output stage after @ref NEGEMMLowpMatrixMultiplyKernel. + * + * The computation is performed in-place + * + * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), + * and adds to it the offset contribution of matrix A and matrix B in-place. + * + * The output stage can perform either QuantizeDownInt32ToUint8Scale or QuantizeDownInt32ToUint8ScaleByFixedPoint for Uint8. + * The output stage can perform either QuantizeDownInt32ToInt8Scale or QuantizeDownInt32ToInt8ScaleByFixedPoint for Int8. + * + * For QuantizeDownInt32ToUint8Scale/QuantizeDownInt32ToInt8Scale the final result is: + * + * ((mm_result'[i][k] + result_offset) * result_mult_int) >> result_shift + * + * For QuantizeDownInt32ToUint8ScaleByFixedPoint/QuantizeDownInt32ToInt8ScaleByFixedPoint the final result is: + * + * (FixedPointMul(mm_result'[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift + * + * where FixedPointMul(x, y) is the nearest integer to the following + * mathematical expression, evaluated without overflow or intermediate rounding: + * + * (x * y) / 2^31 + * + * and mm_result'[i][k] = mm_result[i][k] + + * (vector_sum_col[k] * a_offset) + + * (vector_sum_row[i] * b_offset) + + * (a_offset * b_offset * k) + */ + +class NEGEMMLowpOffsetContributionOutputStageKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpOffsetContributionOutputStageKernel"; + } + /** Constructor */ + NEGEMMLowpOffsetContributionOutputStageKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpOffsetContributionOutputStageKernel(const NEGEMMLowpOffsetContributionOutputStageKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpOffsetContributionOutputStageKernel &operator=(const NEGEMMLowpOffsetContributionOutputStageKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpOffsetContributionOutputStageKernel(NEGEMMLowpOffsetContributionOutputStageKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpOffsetContributionOutputStageKernel &operator=(NEGEMMLowpOffsetContributionOutputStageKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpOffsetContributionOutputStageKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p mm_result. + * @param[out] output Output tensor containing the final quantized result. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] k Number of matrix A columns or Matrix B rows + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_stage GEMMLowp output stage info, providing the type of quantization and the necessary parameters. + */ + void configure(const ITensor *mm_result, const ITensor *vector_sum_col, const ITensor *vector_sum_row, const ITensor *bias, ITensor *output, int32_t k, int32_t a_offset, int32_t b_offset, + GEMMLowpOutputStageInfo output_stage); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpOffsetContributionOutputStageKernel + * + * @param[in] mm_result Input tensor info containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32 + * @param[in] vector_sum_col Tensor info for the input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Tensor info for the input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p mm_result. + * @param[in] output Output tensor info containing the final quantized result. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_stage GEMMLowp output stage info, providing the type of quantization and the necessary parameters. + * + * @return a status + */ + static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, const ITensorInfo *output, int32_t a_offset, + int32_t b_offset, + GEMMLowpOutputStageInfo output_stage); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Function to use for the particular tensors passed to configure() */ + const ITensor *_vector_sum_col; + const ITensor *_vector_sum_row; + const ITensor *_bias; + const ITensor *_mm_result; + ITensor *_output; + int32_t _a_offset; + int32_t _b_offset; + int32_t _k_offset; + bool _slide_vector_sum_col; + GEMMLowpOutputStageInfo _output_stage; +}; +} // namespace arm_compute + +#endif /* ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp index 659c4105c1..2e78107a1a 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h new file mode 100644 index 0000000000..42ef570f77 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H +#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED + * + * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. + * The following computations will be performed by the kernel: + * + * -# Add offset terms to final result + * -# Multiply each entry of result by result_mult_int + * -# Add bias to final result if bias tensor is not a nullptr + * -# Shift the int32 accumulator by result_shift + * -# Clamp the value between the specified min and max bounds + * -# Clamp the resulting int32 values: + * -# -to the [0..255] range and cast to QASYMM8. + * -# -to the [-128..127] range and cast to QASYMM8_SIGNED. + * + */ +class NEGEMMLowpQuantizeDownInt32ScaleKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpQuantizeDownInt32ScaleKernel"; + } + /** Constructor */ + NEGEMMLowpQuantizeDownInt32ScaleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpQuantizeDownInt32ScaleKernel(const NEGEMMLowpQuantizeDownInt32ScaleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpQuantizeDownInt32ScaleKernel &operator=(const NEGEMMLowpQuantizeDownInt32ScaleKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpQuantizeDownInt32ScaleKernel(NEGEMMLowpQuantizeDownInt32ScaleKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpQuantizeDownInt32ScaleKernel &operator=(NEGEMMLowpQuantizeDownInt32ScaleKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpQuantizeDownInt32ScaleKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[out] output_stage GEMMLowp output stage metadata. + */ + void configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo *output_stage); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ScaleKernel + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[out] output_stage GEMMLowp output stage metadata. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run the NEGEMMLowpQuantizeDownInt32ScaleKernel + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run(const Window &window); + + /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ScaleKernel functions + * + * @param[in] window Region on which to execute the kernel. + */ + using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ScaleKernel::*)(const Window &window); + + QuantizeDownFunctionPtr _func; + const ITensor *_input; + const ITensor *_bias; + ITensor *_output; + const GEMMLowpOutputStageInfo *_output_stage; + bool _is_bounded_relu; +}; +} // namespace arm_compute + +#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp index afa8cec76f..1fafc62302 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h new file mode 100644 index 0000000000..d04e713cb8 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H +#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16 + * + * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QSYMM16 value. + * The following computations will be performed by the kernel: + * + * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier + * -# Add bias to final result if bias tensor is not a nullptr + * -# Round to nearest division by a power-of-two using result_shift + * -# Clamp the value between the specified min and max bounds + * -# Clamp the resulting int32 values to the [-32768, 32767] range and cast to QSYMM16. + * + */ +class NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel"; + } + /** Constructor */ + NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16 + * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add + * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0. + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16. + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0. + */ + void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel + * + * @param[in] input Input tensor info. Data type supported: S32 + * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor info with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0. + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16, + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run the NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run(const Window &window); + + /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel functions + * + * @param[in] window Region on which to execute the kernel. + */ + using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::*)(const Window &window); + + QuantizeDownFunctionPtr _func; + const ITensor *_input; + const ITensor *_bias; + ITensor *_output; + int _result_fixedpoint_multiplier; + int _result_shift; + int _min; + int _max; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp index 83416e03e9..bf9ce9554d 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h new file mode 100644 index 0000000000..55c07fbb5a --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H +#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8_SIGNED + * + * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8_SIGNED value. + * The following computations will be performed by the kernel: + * + * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier + * -# Add bias to final result if bias tensor is not a nullptr + * -# Round to nearest division by a power-of-two using result_shift + * -# Add offset to each result + * -# Clamp the value between the specified min and max bounds + * -# Clamp the resulting int32 values to the [-128..127] range and cast to QASYMM8_SIGNED. + * + */ +class NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel"; + } + /** Constructor */ + NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED + * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add + * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication + * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED, + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + */ + void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED, + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run the NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run(const Window &window); + + /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel functions + * + * @param[in] window Region on which to execute the kernel. + */ + using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::*)(const Window &window); + + QuantizeDownFunctionPtr _func; + const ITensor *_input; + const ITensor *_bias; + ITensor *_output; + int _result_fixedpoint_multiplier; + int _result_shift; + int _result_offset_after_shift; + int _min; + int _max; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp index 1e8aa0cc0a..cbb56da8c0 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h new file mode 100644 index 0000000000..1a8de1c441 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H +#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8 + * + * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8 value. + * The following computations will be performed by the kernel: + * + * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier + * -# Add bias to final result if bias tensor is not a nullptr + * -# Round to nearest division by a power-of-two using result_shift + * -# Add offset to each result + * -# Clamp the value between the specified min and max bounds + * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8. + * + */ +class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel"; + } + /** Constructor */ + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8 + * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add + * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication + * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + */ + void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run the NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run(const Window &window); + + /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel functions + * + * @param[in] window Region on which to execute the kernel. + */ + using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::*)(const Window &window); + + QuantizeDownFunctionPtr _func; + const ITensor *_input; + const ITensor *_bias; + ITensor *_output; + int _result_fixedpoint_multiplier; + int _result_shift; + int _result_offset_after_shift; + int _min; + int _max; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp index 566872f02c..db038e559e 100644 --- a/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/KernelDescriptors.h" diff --git a/src/core/NEON/kernels/NEGEMMLowpReductionKernel.h b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.h new file mode 100644 index 0000000000..655658cb6c --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.h @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H +#define ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; +struct GEMMLowpReductionKernelInfo; + +/** Common interface for all NEON reduction kernels */ +class INEGEMMLowpReductionKernel : public INEKernel +{ +public: + /** Constructor */ + INEGEMMLowpReductionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + INEGEMMLowpReductionKernel(const INEGEMMLowpReductionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + INEGEMMLowpReductionKernel &operator=(const INEGEMMLowpReductionKernel &) = delete; + /** Allow instances of this class to be moved */ + INEGEMMLowpReductionKernel(INEGEMMLowpReductionKernel &&) = default; + /** Allow instances of this class to be moved */ + INEGEMMLowpReductionKernel &operator=(INEGEMMLowpReductionKernel &&) = default; + /** Default destructor */ + virtual ~INEGEMMLowpReductionKernel() = default; + + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + */ + virtual void configure(const ITensor *input, ITensor *output, const GEMMLowpReductionKernelInfo &info) = 0; + +protected: + const ITensor *_input; + ITensor *_output; + int32_t _k; + int32_t _scalar; + bool _mul_by_scalar; +}; + +/** NEON kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A. + * + * @note This stage is needed to handle the offset of matrix product + * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md + */ +class NEGEMMLowpMatrixAReductionKernel : public INEGEMMLowpReductionKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpMatrixAReductionKernel"; + } + /** Default constructor */ + NEGEMMLowpMatrixAReductionKernel() = default; + /** Prevent instances of this class from being copied */ + NEGEMMLowpMatrixAReductionKernel(const NEGEMMLowpMatrixAReductionKernel &) = delete; + /** Prevent instances of this class from being copied */ + NEGEMMLowpMatrixAReductionKernel &operator=(const NEGEMMLowpMatrixAReductionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixAReductionKernel(NEGEMMLowpMatrixAReductionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixAReductionKernel &operator=(NEGEMMLowpMatrixAReductionKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpMatrixAReductionKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k (num_mtx_a_cols) Number of matrix A columns + * - is_reshaped (is_interleaved4x4) True if the matrix A has been interleaved4x4 + * - scalar Scalar value to multiply each reduced row by. + * - mul_byscalar True if each reduced column must be multiplied by a scalar value. + */ + void configure(const ITensor *mtx_a, ITensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override; + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixAReductionKernel + * + * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k (num_mtx_a_cols) Number of matrix A columns + * - is_reshaped (is_interleaved4x4) True if the matrix A has been interleaved4x4 + * - scalar Scalar value to multiply each reduced row by. + * - mul_byscalar True if each reduced column must be multiplied by a scalar value. + * + * @return a status + */ + static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Execution of the reduction kernel specialized on the input type + * + * @param[in] window Execution window + */ + template + void run_internal(const Window &window); +}; + +/** NEON kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B. + * + * @note This stage is needed to handle the offset of matrix product + * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md + */ +class NEGEMMLowpMatrixBReductionKernel : public INEGEMMLowpReductionKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpMatrixBReductionKernel"; + } + /** Default constructor */ + NEGEMMLowpMatrixBReductionKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMLowpMatrixBReductionKernel(const NEGEMMLowpMatrixBReductionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMLowpMatrixBReductionKernel &operator=(const NEGEMMLowpMatrixBReductionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixBReductionKernel(NEGEMMLowpMatrixBReductionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixBReductionKernel &operator=(NEGEMMLowpMatrixBReductionKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpMatrixBReductionKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k (num_mtx_b_rows) Number of matrix B rows. + * - is_reshaped (is_transposed1xW) True if the input tensor is transposed 1xW. + * - scalar Scalar value to multiply each reduced row by. + * - mul_byscalar True if each reduced row must be multiplied by a scalar value. + */ + void configure(const ITensor *mtx_b, ITensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override; + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixBReductionKernel + * + * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k (num_mtx_b_rows) Number of matrix B rows. + * - is_reshaped (is_transposed1xW) True if the input tensor is transposed 1xW. + * - scalar Scalar value to multiply each reduced row by. + * - mul_byscalar True if each reduced row must be multiplied by a scalar value. + * + * @return a status + */ + static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Execution of the reduction kernel specialized on the input type + * + * @param[in] window Execution window + * @param[in] info Thread-related information + */ + template + void run_internal(const Window &window, const ThreadInfo &info); +}; +} // namespace arm_compute + +#endif /* ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp index 9aee26ca55..6a2802a991 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h new file mode 100644 index 0000000000..48377838d2 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H +#define ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta: + * + * @note [ MTX_OUT = MTX_0 + beta * MTX_1 ] with MTX_0 and MTX_1 of the same size + * + * @note This stage is used to finalize the GEMM result and it is computed if and only if beta != 0.0. In case this kernel is used for finalizing GEMM result, we have: + * - MTX_0 = A * B * alpha, where MTX_0 is the output of @ref NEGEMMMatrixMultiplyKernel + * - MTX_1 = C + */ +class NEGEMMMatrixAdditionKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEGEMMMatrixAdditionKernel"; + } + /** Constructor */ + NEGEMMMatrixAdditionKernel(); + /** Prevent instances of this class from being copied */ + NEGEMMMatrixAdditionKernel(const NEGEMMMatrixAdditionKernel &) = delete; + /** Prevent instances of this class from being copied */ + NEGEMMMatrixAdditionKernel &operator=(const NEGEMMMatrixAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMMatrixAdditionKernel(NEGEMMMatrixAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMMatrixAdditionKernel &operator=(NEGEMMMatrixAdditionKernel &&) = default; + /** Default destructor */ + ~NEGEMMMatrixAdditionKernel() = default; + /** Initialise the kernel's input and output. + * + * @note The input and output tensor must have the same dimensions + * + * @param[in] input Input tensor (Matrix C). Data types supported: F16/F32 + * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input. + * @param[in] beta Weight of matrix C + */ + void configure(const ITensor *input, ITensor *output, float beta); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixAdditionKernel. + * + * @note The input and output tensor must have the same dimensions + * + * @param[in] input Input tensor info (Matrix C). Data types supported: F16/F32 + * @param[in] output Output tensor info. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input. + * @param[in] beta Weight of matrix C + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the matrix addition functions + * + * @param[in] input An input tensor. Data types supported: F16/F32 + * @param[out] output The output tensor. Data type supported: same as @p input + * @param[in] window Region on which to execute the kernel. + * @param[in] beta Weight of matrix C + */ + using MatrixAdditionFunction = void(const ITensor *input, ITensor *output, const Window &window, float beta); + /** Matrix addition function to use for the particular tensor types passed to configure() */ + MatrixAdditionFunction *_func; + float _beta; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp index a9236890e3..fc95c08f62 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h new file mode 100644 index 0000000000..1ea948de63 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H +#define ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to multiply two input matrices "A" and "B". All elements of the output matrix/vector will be multiplied by alpha after the matrix multiplication + * + * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref NEGEMMInterleave4x4Kernel" and @ref NEGEMMTranspose1xWKernel + * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped + * + */ +class NEGEMMMatrixMultiplyKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMMatrixMultiplyKernel"; + } + /** Constructor */ + NEGEMMMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMMatrixMultiplyKernel(const NEGEMMMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMMatrixMultiplyKernel &operator=(const NEGEMMMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMMatrixMultiplyKernel(NEGEMMMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMMatrixMultiplyKernel &operator=(NEGEMMMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel + * These two kernels change the layout of the original matrices to be more cache-friendly. + * + * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. + * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] alpha Weight of the matrix product + * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel + * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped + */ + void configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixMultiplyKernel + * + * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. + * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 + * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] alpha Weight of the matrix product + * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel + * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input0; + const ITensor *_input1; + ITensor *_output; + float _alpha; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H*/ diff --git a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp index b9b4fe9e9c..6d9f921b02 100644 --- a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp @@ -21,14 +21,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "src/core/AccessWindowStatic.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h new file mode 100644 index 0000000000..7120943a90 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H +#define ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** NEON kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor) + * + * Following an example of how the transposition1xW works when the input data is F32 + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + * + * Following an example of how the transposition1xW works when the input data type is F16 + * + * @f[ + * \left( \begin{array}{cccccccc} + * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 \\ + * a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 \\ + * a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 \\ + * a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc} + * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 & a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 & a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 & a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37\\ + * \end{array} \right) + * @f] + * + * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor) + * + */ +class NEGEMMTranspose1xWKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEGEMMTranspose1xWKernel"; + } + /** Constructor */ + NEGEMMTranspose1xWKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMTranspose1xWKernel(const NEGEMMTranspose1xWKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMTranspose1xWKernel &operator=(const NEGEMMTranspose1xWKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMTranspose1xWKernel(NEGEMMTranspose1xWKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMTranspose1xWKernel &operator=(NEGEMMTranspose1xWKernel &&) = default; + /** Default destructor */ + ~NEGEMMTranspose1xWKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: same as @p input. + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xWKernel + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] output Output tensor info. Data type supported: same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGatherKernel.cpp b/src/core/NEON/kernels/NEGatherKernel.cpp index 193fe98c7b..55ecb8840f 100644 --- a/src/core/NEON/kernels/NEGatherKernel.cpp +++ b/src/core/NEON/kernels/NEGatherKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGatherKernel.h" +#include "src/core/NEON/kernels/NEGatherKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEGatherKernel.h b/src/core/NEON/kernels/NEGatherKernel.h new file mode 100644 index 0000000000..d81e34c39c --- /dev/null +++ b/src/core/NEON/kernels/NEGatherKernel.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_NEGATHERKERNEL_H +#define ARM_COMPUTE_NEGATHERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Kernel to perform other operation on NEON */ +class NEGatherKernel : public INEKernel +{ +public: + /** Default constructor. */ + NEGatherKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEGatherKernel(const NEGatherKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEGatherKernel &operator=(const NEGatherKernel &) = delete; + /** Allow instances of this class to be moved. */ + NEGatherKernel(NEGatherKernel &&) = default; + /** Allow instances of this class to be moved. */ + NEGatherKernel &operator=(NEGatherKernel &&) = default; + /** Default detructor */ + ~NEGatherKernel() = default; + + /** Name of the kernel + * + * @return Kernel name + */ + const char *name() const override + { + return "NEGatherKernel"; + } + /** Initialise the kernel's inputs and outputs + * + * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All + * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 + */ + void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0); + /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel + * + * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All + * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Implementation of the gather operation for 0 axis. + * + * For gather on the 0 axis an element by element copy is performed. + * + * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) + * @param[in] info Info about executing thread and CPU. + */ + template + void gather_0_axis(const Window &window, const ThreadInfo &info); + + /** Implementation of the gather operation. + * + * For 1<=axis a row-wise copy is taking place. + * + * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) + * @param[in] info Info about executing thread and CPU. + */ + template + void gather_n_axis(const Window &window, const ThreadInfo &info); + + using kernel_ptr = void (NEGatherKernel::*)(const Window &window, const ThreadInfo &info); + + const ITensor *_input; + const ITensor *_indices; + int _axis; + ITensor *_output; + kernel_ptr _func; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEGATHERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp b/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp index 5ff5db7266..63b26ab7c0 100644 --- a/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp @@ -21,13 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h" +#include "src/core/NEON/kernels/NEGaussian3x3Kernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEGaussian3x3Kernel.h b/src/core/NEON/kernels/NEGaussian3x3Kernel.h new file mode 100644 index 0000000000..8973b48e7a --- /dev/null +++ b/src/core/NEON/kernels/NEGaussian3x3Kernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H +#define ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a Gaussian 3x3 filter */ +class NEGaussian3x3Kernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEGaussian3x3Kernel"; + } + /** Constructor */ + NEGaussian3x3Kernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussian3x3Kernel(const NEGaussian3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussian3x3Kernel &operator=(const NEGaussian3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NEGaussian3x3Kernel(NEGaussian3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NEGaussian3x3Kernel &operator=(NEGaussian3x3Kernel &&) = default; + /** Default destructor */ + ~NEGaussian3x3Kernel() = default; + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H */ diff --git a/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp b/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp index 5bb3e76ded..ab2feb0dc2 100644 --- a/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp +++ b/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp @@ -21,15 +21,15 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h" +#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -112,6 +112,10 @@ void NEGaussian5x5HorKernel::run(const Window &window, const ThreadInfo &info) input, output); } +NEGaussian5x5VertKernel::NEGaussian5x5VertKernel() +{ +} + BorderSize NEGaussian5x5VertKernel::border_size() const { return BorderSize{ 2, 0 }; diff --git a/src/core/NEON/kernels/NEGaussian5x5Kernel.h b/src/core/NEON/kernels/NEGaussian5x5Kernel.h new file mode 100644 index 0000000000..f4bca55637 --- /dev/null +++ b/src/core/NEON/kernels/NEGaussian5x5Kernel.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H +#define ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a Gaussian 5x5 filter (horizontal pass) */ +class NEGaussian5x5HorKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEGaussian5x5HorKernel"; + } + /** Default constructor */ + NEGaussian5x5HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussian5x5HorKernel(NEGaussian5x5HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussian5x5HorKernel &operator=(NEGaussian5x5HorKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGaussian5x5HorKernel(NEGaussian5x5HorKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGaussian5x5HorKernel &operator=(NEGaussian5x5HorKernel &&) = default; + /** Default destructor */ + ~NEGaussian5x5HorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + BorderSize _border_size; +}; + +/** NEON kernel to perform a Gaussian 5x5 filter (vertical pass) */ +class NEGaussian5x5VertKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEGaussian5x5VertKernel"; + } + /** Default constructor */ + NEGaussian5x5VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussian5x5VertKernel(NEGaussian5x5VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussian5x5VertKernel &operator=(NEGaussian5x5VertKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGaussian5x5VertKernel(NEGaussian5x5VertKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGaussian5x5VertKernel &operator=(NEGaussian5x5VertKernel &&) = default; + /** Default destructor */ + ~NEGaussian5x5VertKernel() = default; + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: S16. + * @param[out] output Destination tensor, Data type supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H */ diff --git a/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp b/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp index 62cf414df2..49c8e9ec3e 100644 --- a/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp +++ b/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp @@ -21,17 +21,17 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h" +#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEGaussianPyramidKernel.h b/src/core/NEON/kernels/NEGaussianPyramidKernel.h new file mode 100644 index 0000000000..e852db2699 --- /dev/null +++ b/src/core/NEON/kernels/NEGaussianPyramidKernel.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H +#define ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a GaussianPyramid (horizontal pass) */ +class NEGaussianPyramidHorKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEGaussianPyramidHorKernel"; + } + /** Default constructor */ + NEGaussianPyramidHorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &&) = default; + /** Default destructor */ + ~NEGaussianPyramidHorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Output should have half the input width. Data type supported: S16. + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + int _l2_load_offset; +}; + +/** NEON kernel to perform a GaussianPyramid (vertical pass) */ +class NEGaussianPyramidVertKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEGaussianPyramidVertKernel"; + } + /** Default constructor */ + NEGaussianPyramidVertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &&) = default; + /** Default destructor */ + ~NEGaussianPyramidVertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: S16. + * @param[out] output Destination tensor. Output should have half the input height. Data type supported: U8. + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + int _t2_load_offset; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp index 483f204b04..516a9b68c2 100644 --- a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp +++ b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" +#include "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h new file mode 100644 index 0000000000..f6d39e50a7 --- /dev/null +++ b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H +#define ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" +namespace arm_compute +{ +class ITensor; + +/** Interface for Compute All Anchors kernel */ +class NEComputeAllAnchorsKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEComputeAllAnchorsKernel"; + } + + /** Default constructor */ + NEComputeAllAnchorsKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEComputeAllAnchorsKernel(const NEComputeAllAnchorsKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEComputeAllAnchorsKernel &operator=(const NEComputeAllAnchorsKernel &) = delete; + /** Allow instances of this class to be moved */ + NEComputeAllAnchorsKernel(NEComputeAllAnchorsKernel &&) = default; + /** Allow instances of this class to be moved */ + NEComputeAllAnchorsKernel &operator=(NEComputeAllAnchorsKernel &&) = default; + /** Default destructor */ + ~NEComputeAllAnchorsKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 + * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input + * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo + * + */ + void configure(const ITensor *anchors, ITensor *all_anchors, const ComputeAnchorsInfo &info); + + /** Static function to check if given info will lead to a valid configuration of @ref NEComputeAllAnchorsKernel + * + * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 + * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input + * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo + * + * @return a Status + */ + static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + template + void internal_run(const Window &window); + + const ITensor *_anchors; + ITensor *_all_anchors; + ComputeAnchorsInfo _anchors_info; +}; +} // arm_compute +#endif // ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H diff --git a/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp b/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp index 00f4087cbc..089cd34e0c 100644 --- a/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp +++ b/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" +#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/HOGInfo.h" diff --git a/src/core/NEON/kernels/NEHOGDescriptorKernel.h b/src/core/NEON/kernels/NEHOGDescriptorKernel.h new file mode 100644 index 0000000000..7845bc2cdf --- /dev/null +++ b/src/core/NEON/kernels/NEHOGDescriptorKernel.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H +#define ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H + +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/Size2D.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform HOG Orientation Binning */ +class NEHOGOrientationBinningKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEHOGOrientationBinningKernel"; + } + /** Default constructor */ + NEHOGOrientationBinningKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGOrientationBinningKernel(const NEHOGOrientationBinningKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGOrientationBinningKernel &operator=(const NEHOGOrientationBinningKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHOGOrientationBinningKernel(NEHOGOrientationBinningKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHOGOrientationBinningKernel &operator=(NEHOGOrientationBinningKernel &&) = default; + /** Default destructor */ + ~NEHOGOrientationBinningKernel() = default; + + /** Initialise the kernel's inputs, output and HOG's metadata + * + * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. + * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 + * @param[out] output Output tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[in] hog_info HOG's metadata + */ + void configure(const ITensor *input_magnitude, const ITensor *input_phase, ITensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised block normalization functions + * + * @param[in] mag_row_ptr Pointer to the first row of the cell in the magnitude tensor + * @param[in] phase_row_ptr Pointer to the first row of the cell in the phase tensor + * @param[out] output_ptr Pointer to the output cell of hog space tensor + * @param[in] mag_stride Stride of the magnitude tensor + * @param[in] phase_stride Stride of the phase tensor + * @param[in] cell_width Width of the cell + * @param[in] cell_height Height of the cell + * @param[in] num_bins Number of bins for each cell + * @param[in] phase_scale Scale factor to apply to the phase in order to calculate the histogram index + */ + using OrientBinFunc = void(const int16_t *__restrict mag_row_ptr, const uint8_t *__restrict phase_row_ptr, float *__restrict output_ptr, size_t mag_stride, size_t phase_stride, size_t cell_width, + size_t cell_height, size_t num_bins, float phase_scale); + /** Orientation binning function to use for the particular cell width passed to configure() */ + OrientBinFunc *_func; + const ITensor *_input_magnitude; + const ITensor *_input_phase; + ITensor *_output; + size_t _cell_width; + size_t _cell_height; + size_t _num_bins; + float _phase_scale; +}; + +/** NEON kernel to perform HOG block normalization */ +class NEHOGBlockNormalizationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEHOGBlockNormalizationKernel"; + } + /** Default constructor */ + NEHOGBlockNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGBlockNormalizationKernel(const NEHOGBlockNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGBlockNormalizationKernel &operator=(const NEHOGBlockNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHOGBlockNormalizationKernel(NEHOGBlockNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHOGBlockNormalizationKernel &operator=(NEHOGBlockNormalizationKernel &&) = default; + /** Default destructor */ + ~NEHOGBlockNormalizationKernel() = default; + + /** Initialise the kernel's input, output and HOG's metadata + * + * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog_info HOG's metadata + */ + void configure(const ITensor *input, ITensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised block normalization functions + * + * @param[in] input_row_ptr Pointer to the first row of the block in the input hog space tensor + * @param[out] output_ptr Pointer to the output block of the hog normalized space + * @param[in] input_stride Stride of the input hog space tensor + * @param[in] num_cells_per_block_height Number of cells per block along the Y direction + * @param[in] num_bins_block_x Number of bins per block along the X direction + * @param[in] num_bins_block Number of total bins per block + * @param[in] l2_hyst_threshold Threshold to use for l2 hysteresis normalization + */ + using BlockNormFunc = void(const float *input_row_ptr, float *output_ptr, size_t input_stride, size_t num_cells_per_block_height, size_t num_bins_block_x, size_t num_bins_block, + float l2_hyst_threshold); + /** Block normalization function to use for the particular normalization type passed to configure() */ + BlockNormFunc *_func; + const ITensor *_input; + ITensor *_output; + Size2D _num_cells_per_block; + Size2D _num_cells_per_block_stride; + size_t _num_bins; + float _l2_hyst_threshold; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H */ diff --git a/src/core/NEON/kernels/NEHOGDetectorKernel.cpp b/src/core/NEON/kernels/NEHOGDetectorKernel.cpp index d5dfa4195d..cba1d5538a 100644 --- a/src/core/NEON/kernels/NEHOGDetectorKernel.cpp +++ b/src/core/NEON/kernels/NEHOGDetectorKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" +#include "src/core/NEON/kernels/NEHOGDetectorKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/HOGInfo.h" diff --git a/src/core/NEON/kernels/NEHOGDetectorKernel.h b/src/core/NEON/kernels/NEHOGDetectorKernel.h new file mode 100644 index 0000000000..45c28099c8 --- /dev/null +++ b/src/core/NEON/kernels/NEHOGDetectorKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEHOGDETECTORKERNEL_H +#define ARM_COMPUTE_NEHOGDETECTORKERNEL_H + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/IHOG.h" +#include "src/core/NEON/INEKernel.h" +#include "support/Mutex.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform HOG detector kernel using linear SVM */ +class NEHOGDetectorKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEHOGDetectorKernel"; + } + /** Default constructor */ + NEHOGDetectorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGDetectorKernel(const NEHOGDetectorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGDetectorKernel &operator=(const NEHOGDetectorKernel &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEHOGDetectorKernel(NEHOGDetectorKernel &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEHOGDetectorKernel &operator=(NEHOGDetectorKernel &&) = delete; + /** Default destructor */ + ~NEHOGDetectorKernel() = default; + + /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect + * + * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref NEHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog HOG data object used by @ref NEHOGOrientationBinningKernel and @ref NEHOGBlockNormalizationKernel + * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the hog->info()->block_stride() + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, uint16_t idx_class = 0); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + IDetectionWindowArray *_detection_windows; + const float *_hog_descriptor; + float _bias; + float _threshold; + uint16_t _idx_class; + size_t _num_bins_per_descriptor_x; + size_t _num_blocks_per_descriptor_y; + size_t _block_stride_width; + size_t _block_stride_height; + size_t _detection_window_width; + size_t _detection_window_height; + size_t _max_num_detection_windows; + arm_compute::Mutex _mutex; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEHOGDETECTORKERNEL_H */ diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp index be68b9c44b..4159e434b2 100644 --- a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp +++ b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "src/core/NEON/kernels/NEHarrisCornersKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.h b/src/core/NEON/kernels/NEHarrisCornersKernel.h new file mode 100644 index 0000000000..4b794107a2 --- /dev/null +++ b/src/core/NEON/kernels/NEHarrisCornersKernel.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEHARRISCORNERSKERNEL_H +#define ARM_COMPUTE_NEHARRISCORNERSKERNEL_H + +#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" +#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" +#include "arm_compute/core/IArray.h" +#include "src/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Common interface for all Harris Score kernels */ +class INEHarrisScoreKernel : public INEKernel +{ +public: + /** Default constructor */ + INEHarrisScoreKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEHarrisScoreKernel(const INEHarrisScoreKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEHarrisScoreKernel &operator=(const INEHarrisScoreKernel &) = delete; + /** Allow instances of this class to be moved */ + INEHarrisScoreKernel(INEHarrisScoreKernel &&) = default; + /** Allow instances of this class to be moved */ + INEHarrisScoreKernel &operator=(INEHarrisScoreKernel &&) = default; + /** Default destructor */ + ~INEHarrisScoreKernel() = default; + +public: + /** Setup the kernel parameters + * + * @param[in] input1 Source image (gradient X). Data types supported: S16/S32 + * @param[in] input2 Source image (gradient Y). Data types supported: same as @ input1 + * @param[out] output Destination image (harris score). Data types supported: F32 + * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) + * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + virtual void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) = 0; + +protected: + const IImage *_input1; /**< Source image - Gx component */ + const IImage *_input2; /**< Source image - Gy component */ + IImage *_output; /**< Source image - Harris score */ + float _sensitivity; /**< Sensitivity value */ + float _strength_thresh; /**< Threshold value */ + float _norm_factor; /**< Normalization factor */ + BorderSize _border_size; /**< Border size */ +}; + +/** Template NEON kernel to perform Harris Score. + * The implementation supports 3, 5, and 7 for the block_size + */ +template +class NEHarrisScoreKernel : public INEHarrisScoreKernel +{ +public: + const char *name() const override + { + return "NEHarrisScoreKernel"; + } + /** Default constructor */ + NEHarrisScoreKernel(); + // Inherited methods overridden: + void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override; + BorderSize border_size() const override; + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised harris score functions */ + using HarrisScoreFunction = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, + float norm_factor, float sensitivity, float strength_thresh); + /** Harris Score function to use for the particular image types passed to configure() */ + HarrisScoreFunction *_func; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEHARRISCORNERSKERNEL_H */ diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp index a50712598a..227013a014 100644 --- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h new file mode 100644 index 0000000000..9d100ebff1 --- /dev/null +++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H +#define ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the height concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class NEHeightConcatenateLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEHeightConcatenateLayerKernel"; + } + /** Default constructor */ + NEHeightConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHeightConcatenateLayerKernel(const NEHeightConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHeightConcatenateLayerKernel &operator=(const NEHeightConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHeightConcatenateLayerKernel(NEHeightConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHeightConcatenateLayerKernel &operator=(NEHeightConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~NEHeightConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] height_offset The starting offset on the Y axis for the output tensor. + * @param[in,out] output Output tensor info. Data types supported: Same as @p input. + * + */ + void configure(const ITensorInfo *input, unsigned int height_offset, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEHeightConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] height_offset The starting offset on the Y axis for the output tensor. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + unsigned int _height_offset; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEHistogramKernel.cpp b/src/core/NEON/kernels/NEHistogramKernel.cpp index 12d1bb8e7e..eddc3b29ab 100644 --- a/src/core/NEON/kernels/NEHistogramKernel.cpp +++ b/src/core/NEON/kernels/NEHistogramKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" +#include "src/core/NEON/kernels/NEHistogramKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEHistogramKernel.h b/src/core/NEON/kernels/NEHistogramKernel.h new file mode 100644 index 0000000000..e14519ce25 --- /dev/null +++ b/src/core/NEON/kernels/NEHistogramKernel.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEHISTOGRAMKERNEL_H +#define ARM_COMPUTE_NEHISTOGRAMKERNEL_H + +#include "src/core/NEON/INEKernel.h" +#include "support/Mutex.h" + +#include +#include + +namespace arm_compute +{ +class IDistribution1D; +class ITensor; +using IImage = ITensor; + +/** Interface for the histogram kernel */ +class NEHistogramKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEHistogramKernel"; + } + /** Default constructor */ + NEHistogramKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHistogramKernel(const NEHistogramKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHistogramKernel &operator=(const NEHistogramKernel &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEHistogramKernel(NEHistogramKernel &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEHistogramKernel &operator=(NEHistogramKernel &&) = delete; + /** Default destructor */ + ~NEHistogramKernel() = default; + + /** Set the input image and the distribution output. + * + * @param[in] input Source image. Data type supported: U8. + * @param[out] output Destination distribution. + * @param[in,out] local_hist Array that the threads use to save their local histograms. + * It's size should be equal to (number_of_threads * num_bins), + * and the Window::thread_id() is used to determine the part of the array + * used by each thread. + * @param[out] window_lut LUT with pre-calculated possible window values. + * The size of the LUT should be equal to max_range_size and it will be filled + * during the configure stage, while it re-used in every run, therefore can be + * safely shared among threads. + */ + void configure(const IImage *input, IDistribution1D *output, uint32_t *local_hist, uint32_t *window_lut); + /** Set the input image and the distribution output. + * + * @note Used for histogram of fixed size equal to 256 + * + * @param[in] input Source image. Data type supported: U8. + * @param[out] output Destination distribution which must be of 256 bins.. + */ + void configure(const IImage *input, IDistribution1D *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Function to merge multiple partial histograms. + * + * @param[out] global_hist Pointer to the final histogram. + * @param[in] local_hist Pointer to the partial histograms. + * @param[in] bins Number of bins. + */ + void merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins); + /** Function to merge multiple minimum values of partial histograms. + * + * @param[out] global_min Pointer to the global min value. + * @param[in] local_min Local min value. + */ + void merge_min(uint8_t *global_min, const uint8_t &local_min); + /** Function to perform histogram on the given window + * + * @param[in] win Region on which to execute the kernel + * @param[in] info Info about the executing thread + */ + void histogram_U8(Window win, const ThreadInfo &info); + /** Function to perform histogram on the given window where histogram is + * of fixed size 256 without ranges and offsets. + * + * @param[in] win Region on which to execute the kernel + * @param[in] info Info about the executing thread + */ + void histogram_fixed_U8(Window win, const ThreadInfo &info); + /** Pre-calculate the pixel windowing for every possible pixel + * + * Calculate (V - offset) * numBins / range where V is every possible pixel value. + * + * @note We currently support U8 image thus possible pixel values are between 0 and 255 + */ + void calculate_window_lut() const; + /** Common signature for all the specialised Histogram functions + * + * @param[in] window Region on which to execute the kernel. + */ + using HistogramFunctionPtr = void (NEHistogramKernel::*)(Window window, const ThreadInfo &info); + + HistogramFunctionPtr _func; ///< Histogram function to use for the particular image types passed to configure() + const IImage *_input; + IDistribution1D *_output; + uint32_t *_local_hist; + uint32_t *_window_lut; + arm_compute::Mutex _hist_mtx; + static constexpr unsigned int _max_range_size{ 256 }; ///< 256 possible pixel values as we handle only U8 images +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEHISTOGRAMKERNEL_H */ diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 915ea75431..93bfcc501a 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" +#include "src/core/NEON/kernels/NEIm2ColKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEIm2ColKernel.h b/src/core/NEON/kernels/NEIm2ColKernel.h new file mode 100644 index 0000000000..6c1c631d82 --- /dev/null +++ b/src/core/NEON/kernels/NEIm2ColKernel.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEIM2COLKERNEL_H +#define ARM_COMPUTE_NEIM2COLKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; +class Size2D; + +/** Interface for the im2col reshape kernel. + * + * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. + * It is used to transform a convolution to a plain matrix multiplication. + * + * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have: + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccc} + * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\ + * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\ + * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\ + * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + */ +class NEIm2ColKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEIm2ColKernel"; + } + /** Default constructor */ + NEIm2ColKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIm2ColKernel(const NEIm2ColKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIm2ColKernel &operator=(const NEIm2ColKernel &) = delete; + /** Allow instances of this class to be moved */ + NEIm2ColKernel(NEIm2ColKernel &&) = default; + /** Allow instances of this class to be moved */ + NEIm2ColKernel &operator=(NEIm2ColKernel &&) = default; + /** Default destructor */ + ~NEIm2ColKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 + * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false + * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported + */ + void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, + bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1); + /** Static function to check if given info will lead to a valid configuration of @ref NEIm2ColKernel + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 + * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false + * @param[in] output The output tensor. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, + bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run im2col + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run_im2col(const Window &window); + + /** Common signature for all the specialised im2col functions + * + * @param[in] window Region on which to execute the kernel. + */ + using Im2ColFunctionPtr = void (NEIm2ColKernel::*)(const Window &window); + + Im2ColFunctionPtr _func; + const ITensor *_input; + ITensor *_output; + std::pair _convolved_dims; + PadStrideInfo _conv_info; + unsigned int _kernel_width; + unsigned int _kernel_height; + bool _has_bias; + Size2D _dilation; + DataLayout _data_layout; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEIM2COLKERNEL_H */ diff --git a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp index 7aa23de6eb..08bf6f0e76 100644 --- a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h" +#include "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h new file mode 100644 index 0000000000..96c0119719 --- /dev/null +++ b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; +struct InstanceNormalizationLayerKernelInfo; + +/** Interface for performing an instance normalization */ +class NEInstanceNormalizationLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEInstanceNormalizationLayerKernel"; + } + /** Default constructor */ + NEInstanceNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEInstanceNormalizationLayerKernel(const NEInstanceNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEInstanceNormalizationLayerKernel &operator=(const NEInstanceNormalizationLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEInstanceNormalizationLayerKernel(NEInstanceNormalizationLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEInstanceNormalizationLayerKernel &operator=(NEInstanceNormalizationLayerKernel &&) = default; + /** Default destructor */ + ~NEInstanceNormalizationLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW + * In case of @p output tensor = nullptr this tensor will store the result of the normalization. + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. + * @param[in] info Kernel meta-data descriptor + */ + void configure(ITensor *input, ITensor *output, const InstanceNormalizationLayerKernelInfo &info); + + /** Static function to check if given info will lead to a valid configuration of @ref NEInstanceNormalizationLayer. + * + * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NCHW + * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. + * @param[in] info Kernel meta-data descriptor + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialized instance normalization functions + * + * @param[in, out] input An input tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization. + * @param[out] output The output tensor. + * @param[in] gamma The scale scalar value applied to the normalized tensor. Defaults to 1.0 + * @param[in] beta The offset scalar value applied to the normalized tensor. Defaults to 0.0 + * @param[in] epsilon Lower bound value for the normalization. Defaults to 1e-12 + */ + using NormalizationFunction = void(ITensor *input, ITensor *output, float gamma, float beta, float epsilon, const Window &window); + + NormalizationFunction *_func; + ITensor *_input; + ITensor *_output; + float _gamma; + float _beta; + float _epsilon; + bool _use_mixed_precision{ true }; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEIntegralImageKernel.cpp b/src/core/NEON/kernels/NEIntegralImageKernel.cpp index 5fc6ca65e3..6ee97eea30 100644 --- a/src/core/NEON/kernels/NEIntegralImageKernel.cpp +++ b/src/core/NEON/kernels/NEIntegralImageKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h" +#include "src/core/NEON/kernels/NEIntegralImageKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEIntegralImageKernel.h b/src/core/NEON/kernels/NEIntegralImageKernel.h new file mode 100644 index 0000000000..8d92504317 --- /dev/null +++ b/src/core/NEON/kernels/NEIntegralImageKernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H +#define ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Kernel to perform an image integral on an image */ +class NEIntegralImageKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEIntegralImageKernel"; + } + /** Default constructor */ + NEIntegralImageKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIntegralImageKernel(const NEIntegralImageKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIntegralImageKernel &operator=(const NEIntegralImageKernel &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEIntegralImageKernel(NEIntegralImageKernel &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEIntegralImageKernel &operator=(NEIntegralImageKernel &&) = delete; + /** Default destructor */ + ~NEIntegralImageKernel() = default; + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U32 + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + bool is_parallelisable() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp index a216981f0f..dae5b57fec 100644 --- a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp +++ b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h" +#include "src/core/NEON/kernels/NEL2NormalizeLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.h b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.h new file mode 100644 index 0000000000..af3ad3403e --- /dev/null +++ b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H +#define ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */ +class NEL2NormalizeLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEL2NormalizeLayerKernel"; + } + /** Default constructor */ + NEL2NormalizeLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEL2NormalizeLayerKernel(const NEL2NormalizeLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEL2NormalizeLayerKernel &operator=(const NEL2NormalizeLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEL2NormalizeLayerKernel(NEL2NormalizeLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEL2NormalizeLayerKernel &operator=(NEL2NormalizeLayerKernel &&) = default; + /** Default destructor */ + ~NEL2NormalizeLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32. + * @param[in] sum Sum values tensor. Data types supported: same as @p input. + * Sum will have the same number of dimensions as input. + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 + * @param[in] epsilon Lower bound value for the normalization. + */ + void configure(const ITensor *input, const ITensor *sum, ITensor *output, int axis, float epsilon); + + /** Static function to check if given info will lead to a valid configuration of @ref NEL2NormalizeLayerKernel. + * + * @param[in] input Source tensor info. Data types supported: F16/F32. + * @param[in] sum Sum values tensor info. Data types supported: same as @p input. + * Sum will have the same number of dimensions as input. + * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 + * @param[in] epsilon Lower bound value for the normalization. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + const ITensor *_sum; + ITensor *_output; + unsigned int _actual_axis; + float _epsilon; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NELKTrackerKernel.cpp b/src/core/NEON/kernels/NELKTrackerKernel.cpp index 6567a8d206..442f001102 100644 --- a/src/core/NEON/kernels/NELKTrackerKernel.cpp +++ b/src/core/NEON/kernels/NELKTrackerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" +#include "src/core/NEON/kernels/NELKTrackerKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NELKTrackerKernel.h b/src/core/NEON/kernels/NELKTrackerKernel.h new file mode 100644 index 0000000000..c24166c042 --- /dev/null +++ b/src/core/NEON/kernels/NELKTrackerKernel.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_LKTRACKERKERNEL_H +#define ARM_COMPUTE_LKTRACKERKERNEL_H + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface for NEON Array of Internal Key Points. */ +using INELKInternalKeypointArray = IArray; + +/** Interface for the Lucas-Kanade tracker kernel */ +class NELKTrackerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NELKTrackerKernel"; + } + /** Default constructor */ + NELKTrackerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELKTrackerKernel(const NELKTrackerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELKTrackerKernel &operator=(const NELKTrackerKernel &) = delete; + /** Allow instances of this class to be moved */ + NELKTrackerKernel(NELKTrackerKernel &&) = default; + /** Allow instances of this class to be moved */ + NELKTrackerKernel &operator=(NELKTrackerKernel &&) = default; + /** Default destructor */ + ~NELKTrackerKernel() = default; + + /** Initialise the kernel input and output + * + * @param[in] input_old Pointer to the input old tensor. Data type supported: U8 + * @param[in] input_new Pointer to the input new tensor. Data type supported. U8 + * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data type supported: S16 + * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data type supported: S16 + * @param[in] old_points Pointer to the IKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points + * @param[out] new_points Pointer to the IKeyPointArray storing new key points + * @param[in, out] old_points_internal Pointer to the array of NELKInternalKeypoint for old points + * @param[out] new_points_internal Pointer to the array of NELKInternalKeypoint for new points + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminate the algorithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + * @param[in] num_levels The number of pyramid levels + * @param[in] pyramid_scale Scale factor used for generating the pyramid + */ + void configure(const ITensor *input_old, const ITensor *input_new, const ITensor *old_scharr_gx, const ITensor *old_scharr_gy, + const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, IKeyPointArray *new_points, + INELKInternalKeypointArray *old_points_internal, INELKInternalKeypointArray *new_points_internal, + Termination termination, bool use_initial_estimate, float epsilon, unsigned int num_iterations, size_t window_dimension, + size_t level, size_t num_levels, float pyramid_scale); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /** Initialise the array of keypoints in the provide range + * + * @param[in] start Index of first element in the keypoints array to be initialised + * @param[in] end Index after last elelemnt in the keypoints array to be initialised + */ + void init_keypoints(int start, int end); + /** Compute the structure tensor A^T * A based on the scharr gradients I_x and I_y + * + * @param[in] keypoint Keypoint for which gradients are computed + * @param[out] bilinear_ix Intermediate interpolated data for X gradient + * @param[out] bilinear_iy Intermediate interpolated data for Y gradient + * + * @return Values A11, A12, A22 + */ + std::tuple compute_spatial_gradient_matrix(const NELKInternalKeypoint &keypoint, int32_t *bilinear_ix, int32_t *bilinear_iy); + /** Compute the vector A^T * b, i.e. -sum(I_d * I_t) for d in {x,y} + * + * @param[in] old_keypoint Old keypoint for which gradient is computed + * @param[in] new_keypoint New keypoint for which gradient is computed + * @param[in] bilinear_ix Intermediate interpolated data for X gradient + * @param[in] bilinear_iy Intermediate interpolated data for Y gradient + * + * @return Values b1, b2 + */ + std::pair compute_image_mismatch_vector(const NELKInternalKeypoint &old_keypoint, const NELKInternalKeypoint &new_keypoint, const int32_t *bilinear_ix, const int32_t *bilinear_iy); + + const ITensor *_input_old; + const ITensor *_input_new; + const ITensor *_old_scharr_gx; + const ITensor *_old_scharr_gy; + IKeyPointArray *_new_points; + const IKeyPointArray *_new_points_estimates; + const IKeyPointArray *_old_points; + INELKInternalKeypointArray *_old_points_internal; + INELKInternalKeypointArray *_new_points_internal; + Termination _termination; + bool _use_initial_estimate; + float _pyramid_scale; + float _epsilon; + unsigned int _num_iterations; + int _window_dimension; + unsigned int _level; + unsigned int _num_levels; + ValidRegion _valid_region; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NELKTRACKERKERNEL_H */ diff --git a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp index b8e6a6d763..f11694dee4 100644 --- a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h new file mode 100644 index 0000000000..72093b4bb7 --- /dev/null +++ b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H +#define ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to multiply each row of first tensor with low 2 dimensions of second tensor. */ +class NELocallyConnectedMatrixMultiplyKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NELocallyConnectedMatrixMultiplyKernel"; + } + /** Default constructor */ + NELocallyConnectedMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELocallyConnectedMatrixMultiplyKernel(const NELocallyConnectedMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELocallyConnectedMatrixMultiplyKernel &operator=(const NELocallyConnectedMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + NELocallyConnectedMatrixMultiplyKernel(NELocallyConnectedMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + NELocallyConnectedMatrixMultiplyKernel &operator=(NELocallyConnectedMatrixMultiplyKernel &&) = default; + /** Default destructor */ + ~NELocallyConnectedMatrixMultiplyKernel() = default; + /** Initialise the kernel's input and output + * + * @param[in] input0 First input tensor. Data types supported: F16, F32 + * @param[in] input1 Second input tensor containing the Matrix B. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + */ + void configure(const ITensor *input0, const ITensor *input1, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NELocallyConnectedMatrixMultiplyKernel + * + * @param[in] input0 First input tensor info. Data types supported: F16, F32 + * @param[in] input1 Second input tensor info. Data type supported: same as @p input0 + * @param[in] output Output tensor info. Data type supported: same as @p input0 + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input0; + const ITensor *_input1; + ITensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H */ diff --git a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp index 8d82e1abd6..205f67823d 100644 --- a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp +++ b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" +#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEMagnitudePhaseKernel.h b/src/core/NEON/kernels/NEMagnitudePhaseKernel.h new file mode 100644 index 0000000000..3803d05ce9 --- /dev/null +++ b/src/core/NEON/kernels/NEMagnitudePhaseKernel.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H +#define ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Template interface for the kernel to compute magnitude and phase */ +template +class NEMagnitudePhaseKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEMagnitudePhaseKernel"; + } + /** Default constructor */ + NEMagnitudePhaseKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitudePhaseKernel(const NEMagnitudePhaseKernel &) = delete; + /** Default move constructor */ + NEMagnitudePhaseKernel(NEMagnitudePhaseKernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitudePhaseKernel &operator=(const NEMagnitudePhaseKernel &) = delete; + /** Default move assignment operator */ + NEMagnitudePhaseKernel &operator=(NEMagnitudePhaseKernel &&) = default; + /** Destructor */ + ~NEMagnitudePhaseKernel() = default; + + /** Initialise the kernel's input, output. + * + * @note At least one of out1 or out2 must be set + * + * @param[in] gx Gradient X tensor. Data type supported: S16. + * @param[in] gy Gradient Y tensor. Data type supported: S16. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data type supported: S16. + * @param[out] phase (Optional) The output tensor - Phase. Data type supported: U8. + */ + void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Function to perform magnitude on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void magnitude(const Window &window); + /** Function to perform phase on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void phase(const Window &window); + /** Function to perform magnitude and phase on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void magnitude_phase(const Window &window); + +private: + /** Common signature for all the specialised MagnitudePhase functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MagnitudePhaseFunctionPtr = void (NEMagnitudePhaseKernel::*)(const Window &window); + /** MagnitudePhase function to use for the particular formats passed to configure() */ + MagnitudePhaseFunctionPtr _func; + const ITensor *_gx; /**< Input gradient X */ + const ITensor *_gy; /**< Input gradient Y */ + ITensor *_magnitude; /**< Output - Magnitude */ + ITensor *_phase; /**< Output - Phase */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp index 87caf00477..761fa15238 100644 --- a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h" +#include "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" diff --git a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h new file mode 100644 index 0000000000..8cdfe2b953 --- /dev/null +++ b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H +#define ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the pooling layer kernel */ +class NEMaxUnpoolingLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEMaxUnpoolingLayerKernel"; + } + /** Default constructor */ + NEMaxUnpoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMaxUnpoolingLayerKernel(const NEMaxUnpoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMaxUnpoolingLayerKernel &operator=(const NEMaxUnpoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEMaxUnpoolingLayerKernel(NEMaxUnpoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEMaxUnpoolingLayerKernel &operator=(NEMaxUnpoolingLayerKernel &&) = default; + /** Default destructor */ + ~NEMaxUnpoolingLayerKernel() = default; + /** Set the input and output tensors. + * + * @note Output shape must be equal to the shape of the original input to pool. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] indices Tensor containing the offset to store the input elements in the output tensor. + * @ref NEPoolingLayerKernel with indices should precede this function in order to + * properly reconstruct the output tensor. + * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(const ITensor *input, const ITensor *indices, ITensor *output, const PoolingLayerInfo &pool_info); + /** Static function to check if given info will lead to a valid configuration of @ref NEMaxUnpoolingLayerKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Destination tensor info. Data types supported: Same as @p input. + * @param[in] indices Tensor info of the indices of the maximal values. Data type supported: U32. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool. + * + * @param[in] window_input Input region on which to execute the kernel. + */ + template + void unpooling2(const Window &window_input); + + using UnpoolingFunction = void (NEMaxUnpoolingLayerKernel::*)(const Window &window); + +private: + UnpoolingFunction _func; + const ITensor *_input; + ITensor *_output; + const ITensor *_indices; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp index c4e036a8b9..a6bb9f2ef7 100644 --- a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h" +#include "src/core/NEON/kernels/NEMeanStdDevKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEMeanStdDevKernel.h b/src/core/NEON/kernels/NEMeanStdDevKernel.h new file mode 100644 index 0000000000..e694f3824d --- /dev/null +++ b/src/core/NEON/kernels/NEMeanStdDevKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEMEANSTDDEVKERNEL_H +#define ARM_COMPUTE_NEMEANSTDDEVKERNEL_H + +#include "src/core/NEON/INEKernel.h" +#include "support/Mutex.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */ +class NEMeanStdDevKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEMeanStdDevKernel"; + } + /** Default constructor */ + NEMeanStdDevKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevKernel(const NEMeanStdDevKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevKernel &operator=(const NEMeanStdDevKernel &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMeanStdDevKernel(NEMeanStdDevKernel &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMeanStdDevKernel &operator=(NEMeanStdDevKernel &&) = delete; + /** Default destructor */ + ~NEMeanStdDevKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input image. Data type supported: U8. + * @param[out] mean Input average pixel value. + * @param[out] global_sum Keeps global sum of pixel values. + * @param[out] stddev (Optional) Output standard deviation of pixel values. + * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values. + */ + void configure(const IImage *input, float *mean, uint64_t *global_sum, float *stddev = nullptr, uint64_t *global_sum_squared = nullptr); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + + BorderSize border_size() const override; + +private: + const IImage *_input; + float *_mean; + float *_stddev; + uint64_t *_global_sum; + uint64_t *_global_sum_squared; + arm_compute::Mutex _mtx; + BorderSize _border_size; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEMEANSTDDEVKERNEL_H */ diff --git a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp index 8ee9ff6f40..6a41e3a161 100644 --- a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h" +#include "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h new file mode 100644 index 0000000000..59d073ada5 --- /dev/null +++ b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H +#define ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#include +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to normalize the input 2D tensor across the first dimension with respect to mean and standard deviation of the same dimension. */ +class NEMeanStdDevNormalizationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEMeanStdDevNormalizationKernel"; + } + /** Default constructor */ + NEMeanStdDevNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevNormalizationKernel(const NEMeanStdDevNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevNormalizationKernel &operator=(const NEMeanStdDevNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEMeanStdDevNormalizationKernel(NEMeanStdDevNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEMeanStdDevNormalizationKernel &operator=(NEMeanStdDevNormalizationKernel &&) = default; + /** Default destructor */ + ~NEMeanStdDevNormalizationKernel() = default; + /** Initialise the kernel's input and outputs. + * + * @note If the output tensor is a nullptr, the normalization will be performed in-place. + * + * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr, + * this tensor will store the result of the normalization. Data types supported: F16/F32. + * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input + * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. + */ + void configure(ITensor *input, ITensor *output = nullptr, float epsilon = 1e-8f); + /** Static function to check if given info will lead to a valid configuration of @ref NEMeanStdDevNormalizationKernel + * + * @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr, + * this tensor will store the result of the normalization. Data types supported: F16/F32. + * @param[in] output (Optional) Destination tensor info. It can be nullptr in case of in-place computation. Data type supported: same as @p input + * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output = nullptr, float epsilon = 1e-8f); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Normalizes the input with respect to mean and standard deviation. + * + * @param[in] window Region on which to execute the kernel. + */ + template + void mean_stddev_normalization(const Window &window); + + ITensor *_input; + ITensor *_output; + float _epsilon; + + using MeanStdDevNormFunction = void (NEMeanStdDevNormalizationKernel::*)(const Window &window); + + MeanStdDevNormFunction _func; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEMedian3x3Kernel.cpp b/src/core/NEON/kernels/NEMedian3x3Kernel.cpp index 86fcc30e91..0160edc650 100644 --- a/src/core/NEON/kernels/NEMedian3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEMedian3x3Kernel.cpp @@ -21,14 +21,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h" +#include "src/core/NEON/kernels/NEMedian3x3Kernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEMedian3x3Kernel.h b/src/core/NEON/kernels/NEMedian3x3Kernel.h new file mode 100644 index 0000000000..b9e28b3053 --- /dev/null +++ b/src/core/NEON/kernels/NEMedian3x3Kernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEMEDIAN3x3KERNEL_H +#define ARM_COMPUTE_NEMEDIAN3x3KERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Kernel to perform a median filter on a tensor */ +class NEMedian3x3Kernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEMedian3x3Kernel"; + } + /** Default constructor */ + NEMedian3x3Kernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMedian3x3Kernel(const NEMedian3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMedian3x3Kernel &operator=(const NEMedian3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NEMedian3x3Kernel(NEMedian3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NEMedian3x3Kernel &operator=(NEMedian3x3Kernel &&) = default; + /** Default destructor */ + ~NEMedian3x3Kernel() = default; + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEMEDIAN3x3KERNEL_H */ diff --git a/src/core/NEON/kernels/NEMemsetKernel.cpp b/src/core/NEON/kernels/NEMemsetKernel.cpp index fd427cc8c5..a8dfda3775 100644 --- a/src/core/NEON/kernels/NEMemsetKernel.cpp +++ b/src/core/NEON/kernels/NEMemsetKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h" +#include "src/core/NEON/kernels/NEMemsetKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEMemsetKernel.h b/src/core/NEON/kernels/NEMemsetKernel.h new file mode 100644 index 0000000000..a720e60251 --- /dev/null +++ b/src/core/NEON/kernels/NEMemsetKernel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEMEMSETKERNEL_H +#define ARM_COMPUTE_NEMEMSETKERNEL_H + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for filling the planes of a tensor */ +class NEMemsetKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEMemsetKernel"; + } + /** Default constructor */ + NEMemsetKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMemsetKernel(const NEMemsetKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMemsetKernel &operator=(const NEMemsetKernel &) = delete; + /** Allow instances of this class to be moved */ + NEMemsetKernel(NEMemsetKernel &&) = default; + /** Allow instances of this class to be moved */ + NEMemsetKernel &operator=(NEMemsetKernel &&) = default; + /** Default destructor */ + ~NEMemsetKernel() = default; + /** Initialise the kernel's tensor and filling value + * + * @param[in,out] tensor Input tensor to fill. Supported data types: All + * @param[in] constant_value The value used to fill the planes of the tensor + */ + void configure(ITensor *tensor, const PixelValue &constant_value); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + ITensor *_tensor; + PixelValue _constant_value; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEMEMSETKERNEL_H */ diff --git a/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp b/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp index f675c391ed..92f6b4a42e 100644 --- a/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h" +#include "src/core/NEON/kernels/NEMinMaxLayerKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEMinMaxLayerKernel.h b/src/core/NEON/kernels/NEMinMaxLayerKernel.h new file mode 100644 index 0000000000..b4852ad9f2 --- /dev/null +++ b/src/core/NEON/kernels/NEMinMaxLayerKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_NEMINMAXLAYERKERNEL_H +#define ARM_COMPUTE_NEMINMAXLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" +#include "support/Mutex.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform min max search on a 3D tensor. */ +class NEMinMaxLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEMinMaxLayerKernel"; + } + /** Default constructor */ + NEMinMaxLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxLayerKernel(const NEMinMaxLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxLayerKernel &operator=(const NEMinMaxLayerKernel &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMinMaxLayerKernel(NEMinMaxLayerKernel &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMinMaxLayerKernel &operator=(NEMinMaxLayerKernel &&) = delete; + /** Default destructor */ + ~NEMinMaxLayerKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @note output[0] = minimum + * @note output[1] = maximum + * + * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data type supported: F32. + * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum value for each 3D input tensor. + * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32 + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel + * + * @param[in] input Input tensor info. Data types supported: F32. + * @param[in] output Output tensor info with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor. + * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + /** Resets global minimum and maximum. */ + void reset(); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + void update_min_max(float *out_ptr, float min, float max); + const ITensor *_input; + ITensor *_output; + arm_compute::Mutex _mtx; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEMINMAXLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp b/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp index e1691dc8ff..402e6f1811 100644 --- a/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp +++ b/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h" +#include "src/core/NEON/kernels/NEMinMaxLocationKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEMinMaxLocationKernel.h b/src/core/NEON/kernels/NEMinMaxLocationKernel.h new file mode 100644 index 0000000000..a24666096f --- /dev/null +++ b/src/core/NEON/kernels/NEMinMaxLocationKernel.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H +#define ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H + +#include "arm_compute/core/IArray.h" +#include "src/core/NEON/INEKernel.h" +#include "support/Mutex.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Interface for the kernel to perform min max search on an image. */ +class NEMinMaxKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEMinMaxKernel"; + } + /** Default constructor */ + NEMinMaxKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxKernel(const NEMinMaxKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxKernel &operator=(const NEMinMaxKernel &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMinMaxKernel(NEMinMaxKernel &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMinMaxKernel &operator=(NEMinMaxKernel &&) = delete; + /** Default destructor */ + ~NEMinMaxKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input Image. Data types supported: U8/S16/F32. + * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. + * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. + */ + void configure(const IImage *input, void *min, void *max); + /** Resets global minimum and maximum. */ + void reset(); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Performs the min/max algorithm on U8 images on a given window. + * + * @param win The window to run the algorithm on. + */ + void minmax_U8(Window win); + /** Performs the min/max algorithm on S16 images on a given window. + * + * @param win The window to run the algorithm on. + */ + void minmax_S16(Window win); + /** Performs the min/max algorithm on F32 images on a given window. + * + * @param win The window to run the algorithm on. + */ + void minmax_F32(Window win); + /** Common signature for all the specialised MinMax functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MinMaxFunction = void (NEMinMaxKernel::*)(Window window); + /** MinMax function to use for the particular image types passed to configure() */ + MinMaxFunction _func; + /** Helper to update min/max values **/ + template + void update_min_max(T min, T max); + + const IImage *_input; /**< Input image. */ + void *_min; /**< Minimum value. */ + void *_max; /**< Maximum value. */ + arm_compute::Mutex _mtx; /**< Mutex used for result reduction. */ +}; + +/** Interface for the kernel to find min max locations of an image. */ +class NEMinMaxLocationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEMinMaxLocationKernel"; + } + /** Default constructor */ + NEMinMaxLocationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxLocationKernel(const NEMinMaxLocationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxLocationKernel &operator=(const NEMinMaxLocationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEMinMaxLocationKernel(NEMinMaxLocationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEMinMaxLocationKernel &operator=(NEMinMaxLocationKernel &&) = default; + /** Default destructor */ + ~NEMinMaxLocationKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input Image. Data types supported: U8/S16/F32. + * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. + * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. + * @param[out] min_loc Array of minimum value locations. + * @param[out] max_loc Array of maximum value locations. + * @param[out] min_count Number of minimum value encounters. + * @param[out] max_count Number of maximum value encounters. + */ + void configure(const IImage *input, void *min, void *max, + ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr, + uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + bool is_parallelisable() const override; + +private: + /** Performs the min/max location algorithm on T type images on a given window. + * + * @param win The window to run the algorithm on. + */ + template + void minmax_loc(const Window &win); + /** Common signature for all the specialised MinMaxLoc functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MinMaxLocFunction = void (NEMinMaxLocationKernel::*)(const Window &window); + /** MinMaxLoc function to use for the particular image types passed to configure() */ + MinMaxLocFunction _func; + /** Helper to create a function pointer table for the parameterized MinMaxLocation functions. */ + template + struct create_func_table; + + const IImage *_input; /**< Input image. */ + void *_min; /**< Minimum value. */ + void *_max; /**< Maximum value. */ + uint32_t *_min_count; /**< Count of minimum value encounters. */ + uint32_t *_max_count; /**< Count of maximum value encounters. */ + ICoordinates2DArray *_min_loc; /**< Locations of minimum values. */ + ICoordinates2DArray *_max_loc; /**< Locations of maximum values. */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NENonLinearFilterKernel.cpp b/src/core/NEON/kernels/NENonLinearFilterKernel.cpp index 31919ead03..58c0acd404 100644 --- a/src/core/NEON/kernels/NENonLinearFilterKernel.cpp +++ b/src/core/NEON/kernels/NENonLinearFilterKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h" +#include "src/core/NEON/kernels/NENonLinearFilterKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NENonLinearFilterKernel.h b/src/core/NEON/kernels/NENonLinearFilterKernel.h new file mode 100644 index 0000000000..3cef12e8ec --- /dev/null +++ b/src/core/NEON/kernels/NENonLinearFilterKernel.h @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NENONLINEARFILTERKERNEL_H +#define ARM_COMPUTE_NENONLINEARFILTERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to apply a non-linear filter */ +class NENonLinearFilterKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NENonLinearFilterKernel"; + } + /** Default constructor */ + NENonLinearFilterKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonLinearFilterKernel(NENonLinearFilterKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &) = delete; + /** Allow instances of this class to be moved */ + NENonLinearFilterKernel(NENonLinearFilterKernel &&) = default; + /** Allow instances of this class to be moved */ + NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &&) = default; + /** Default destructor */ + ~NENonLinearFilterKernel() = default; + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /** Fill mask with the corresponding given pattern. + * + * @param[in,out] mask Mask to be filled according to pattern + * @param[in] cols Columns (width) of mask + * @param[in] rows Rows (height) of mask + * @param[in] pattern Pattern to fill the mask according to + */ + void fill_mask(uint8_t *mask, int cols, int rows, MatrixPattern pattern); + /** Apply a median filter when given mask pattern is defined as box. + * + * @param[in] win Window to apply the filter on. + */ + template + void median_filter_box(const Window &win); + /** Apply a min filter when given mask pattern is defined as box. + * + * @param[in] win Window to apply the filter on. + */ + template + void min_filter_box(const Window &win); + /** Apply a max filter when given mask pattern is defined as box. + * + * @param[in] win Window to apply the filter on. + */ + template + void max_filter_box(const Window &win); + /** Apply a median filter when given mask pattern is defined as cross. + * + * @param[in] win Window to apply the filter on. + */ + template + void median_filter_cross(const Window &win); + /** Apply a min filter when given mask pattern is defined as cross. + * + * @param[in] win Window to apply the filter on. + */ + template + void min_filter_cross(const Window &win); + /** Apply a max filter when given mask pattern is defined as cross. + * + * @param[in] win Window to apply the filter on. + */ + template + void max_filter_cross(const Window &win); + /** Apply a median filter when given mask pattern is defined as disk. + * + * @param[in] win Window to apply the filter on. + */ + template + void median_filter_disk(const Window &win); + /** Apply a min filter when given mask pattern is defined as disk. + * + * @param[in] win Window to apply the filter on. + */ + template + void min_filter_disk(const Window &win); + /** Apply a max filter when given mask pattern is defined as disk. + * + * @param[in] win Window to apply the filter on. + */ + template + void max_filter_disk(const Window &win); + /** Apply a non-linear filter when given mask has user-defined pattern. + * + * @param[in] win Window to apply the filter on. + */ + template + void non_linear_filter_generic(const Window &win); + +private: + unsigned int _border_width; + const ITensor *_input; + ITensor *_output; + const uint8_t *_mask; + MatrixPattern _pattern; + NonLinearFilterFunction _function; + unsigned int _func_idx; + BorderSize _border_size; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NENONLINEARFILTERKERNEL_H */ diff --git a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp index 9566ced768..9f5dfcdcdb 100644 --- a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp +++ b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" +#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h new file mode 100644 index 0000000000..d32dfecfeb --- /dev/null +++ b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H +#define ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H + +#include "src/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface to perform Non-Maxima suppression over a 3x3 window using NEON + * + * @note Used by @ref NEFastCorners and @ref NEHarrisCorners + */ +class NENonMaximaSuppression3x3Kernel : public INEKernel +{ +public: + const char *name() const override + { + return "NENonMaximaSuppression3x3Kernel"; + } + /** Default constructor */ + NENonMaximaSuppression3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonMaximaSuppression3x3Kernel(const NENonMaximaSuppression3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonMaximaSuppression3x3Kernel &operator=(const NENonMaximaSuppression3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NENonMaximaSuppression3x3Kernel(NENonMaximaSuppression3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NENonMaximaSuppression3x3Kernel &operator=(NENonMaximaSuppression3x3Kernel &&) = default; + /** Default destructor */ + ~NENonMaximaSuppression3x3Kernel() = default; + + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8/F32 + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +protected: + /** Common signature for all the specialised non-maxima suppression 3x3 functions + * + * @param[in] input_ptr Pointer to the input tensor. + * @param[out] output_ptr Pointer to the output tensor + * @param[in] input_stride Stride of the input tensor + */ + using NonMaxSuppr3x3Function = void(const void *__restrict input_ptr, void *__restrict output_ptr, const uint32_t input_stride); + + NonMaxSuppr3x3Function *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */ + const ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ +}; + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 + */ +class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel +{ +public: + const char *name() const override + { + return "NENonMaximaSuppression3x3FP16Kernel"; + } + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); +}; +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 */ +using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel; +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +} // namespace arm_compute +#endif /* _ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H */ diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp index 1b72a3e277..27464d5b42 100644 --- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h" +#include "src/core/NEON/kernels/NENormalizationLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.h b/src/core/NEON/kernels/NENormalizationLayerKernel.h new file mode 100644 index 0000000000..53a06b9ed9 --- /dev/null +++ b/src/core/NEON/kernels/NENormalizationLayerKernel.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the normalization layer kernel. + */ +class NENormalizationLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NENormalizationLayerKernel"; + } + /** Default constructor */ + NENormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENormalizationLayerKernel(const NENormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENormalizationLayerKernel &operator=(const NENormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + NENormalizationLayerKernel(NENormalizationLayerKernel &&) = default; + /** Default move assignment operator */ + NENormalizationLayerKernel &operator=(NENormalizationLayerKernel &&) = default; + /** Default destructor */ + ~NENormalizationLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32. Data layouts supported: NCHW/NHWC. + * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], + * Data type and layout supported: same as @p input. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type and layout supported: same as @p input. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info); + /** Static function to check if given info will lead to a valid configuration of @ref NENormalizationLayerKernel + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32. Data layouts supported: NCHW/NHWC. + * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], + * Data type and layout supported: same as @p input. + * @param[in] output Destination tensor. Output will have the same number of dimensions as input. Data type and layout supported: same as @p input. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Function to perform normalization depending on the given template + * dimension. The second template parameter specifies whether the + * normalization has to be 1D or 2D. + * + * @note Only supported normalizations are: + * - 1D over X or Z + * - 2D over X and Y + * + * @param[in] window Region on which to execute the kernel. + */ + template + void normalize_float(const Window &window); + + /** Common signature for all the specialised normalization functions + * + * @param[in] window Region on which to execute the kernel. + */ + using NormalizationFunction = void (NENormalizationLayerKernel::*)(const Window &window); + +private: + NormalizationFunction _func; + const ITensor *_input; + const ITensor *_input_squared; + ITensor *_output; + NormalizationLayerInfo _norm_info; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEPadLayerKernel.cpp b/src/core/NEON/kernels/NEPadLayerKernel.cpp index ca9c5419e0..200fe2ce54 100644 --- a/src/core/NEON/kernels/NEPadLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPadLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h" +#include "src/core/NEON/kernels/NEPadLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEPadLayerKernel.h b/src/core/NEON/kernels/NEPadLayerKernel.h new file mode 100644 index 0000000000..ec4bdffdcd --- /dev/null +++ b/src/core/NEON/kernels/NEPadLayerKernel.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEPADLAYERKERNEL_H +#define ARM_COMPUTE_NEPADLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to add padding to a tensor + * + * Add padding given padding information + */ +class NEPadLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEPadLayerKernel"; + } + /** Default constructor */ + NEPadLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPadLayerKernel(const NEPadLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPadLayerKernel &operator=(const NEPadLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEPadLayerKernel(NEPadLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEPadLayerKernel &operator=(NEPadLayerKernel &&) = default; + /** Default destructor */ + ~NEPadLayerKernel() = default; + + /** Initialize the function + * + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] + * specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding + * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT. + * Only CONSTANT padding mode is currently supported + */ + void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT); + /** Static function to check if given info will lead to a valid configuration of @ref NEPadLayer. + * + * @param[in] input Source tensor info. Data types supported: All. + * @param[in] output Output tensor info. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] + * specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding + * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT. + * Only CONSTANT padding mode is currently supported + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run the padding function with constant padding + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run_pad_constant(const Window &window); + + /** Function to run the padding function with constant padding for 3D input and 1D, 2D, 3D padding + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + void run_pad_constant_uint8_3Dinput_3Dpad(const Window &window); + + /** Common signature for all the specialised permute functions + * + * @param[in] window Region on which to execute the kernel. + */ + using PadFunctionPtr = void (NEPadLayerKernel::*)(const Window &window); + + PadFunctionPtr _func; + const ITensor *_input; + ITensor *_output; + PaddingList _padding; + PixelValue _constant_value; + PaddingMode _mode; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEPADLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEPermuteKernel.cpp b/src/core/NEON/kernels/NEPermuteKernel.cpp index eab11ebfff..6a9f5d36ef 100644 --- a/src/core/NEON/kernels/NEPermuteKernel.cpp +++ b/src/core/NEON/kernels/NEPermuteKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h" +#include "src/core/NEON/kernels/NEPermuteKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -123,7 +123,7 @@ void NEPermuteKernel::run_permute(const Window &window) // Input window Window window_in = window; - // we only support these two configs in arm_compute/core/NEON/kernels/convolution/common/shims.hpp, for all others + // we only support these two configs in src/core/NEON/kernels/convolution/common/shims.hpp, for all others // we have to fall back to C++ if((input_layout == DataLayout::NCHW && _perm == PermutationVector{ 2U, 0U, 1U }) || (input_layout == DataLayout::NHWC && _perm == PermutationVector{ 1U, 2U, 0U })) { diff --git a/src/core/NEON/kernels/NEPermuteKernel.h b/src/core/NEON/kernels/NEPermuteKernel.h new file mode 100644 index 0000000000..80187de9eb --- /dev/null +++ b/src/core/NEON/kernels/NEPermuteKernel.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEPERMUTEKERNEL_H +#define ARM_COMPUTE_NEPERMUTEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** NEON kernel to perform tensor permutation. + * + * Permutes given a permutation vector + */ +class NEPermuteKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEPermuteKernel"; + } + /** Default constructor */ + NEPermuteKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPermuteKernel(const NEPermuteKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPermuteKernel &operator=(const NEPermuteKernel &) = delete; + /** Allow instances of this class to be moved */ + NEPermuteKernel(NEPermuteKernel &&) = default; + /** Allow instances of this class to be moved */ + NEPermuteKernel &operator=(NEPermuteKernel &&) = default; + /** Default destructor */ + ~NEPermuteKernel() = default; + + /** Set the input and output of the kernel. + * + * @note Arbitrary permutation vectors are supported with rank not greater than 4 + * + * @param[in] input The input tensor to permute. Data types supported: All + * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] perm Permutation vector + */ + void configure(const ITensor *input, ITensor *output, const PermutationVector &perm); + /** Static function to check if given info will lead to a valid configuration of @ref CPPPermuteKernel + * + * @note Arbitrary permutation vectors are supported with rank not greater than 4 + * + * @param[in] input The input tensor to permute. Data types supported: All + * @param[in] output The output tensor. Data types supported: Same as @p input + * @param[in] perm Permutation vector + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run the permute + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run_permute(const Window &window); + + /** Common signature for all the specialised permute functions + * + * @param[in] window Region on which to execute the kernel. + */ + using PermuteFunctionPtr = void (NEPermuteKernel::*)(const Window &window); + + PermuteFunctionPtr _func; + const ITensor *_input; + ITensor *_output; + PermutationVector _perm; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEPERMUTEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp index 0847cb1f23..8d17651f37 100644 --- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" +#include "src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" #include "arm_compute/core/TensorInfo.h" #include "src/core/CPP/Validate.h" diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h new file mode 100644 index 0000000000..d414168b2d --- /dev/null +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H +#define ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform addition between two tensors */ +class NEPixelWiseMultiplicationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEPixelWiseMultiplicationKernel"; + } + /** Default constructor */ + NEPixelWiseMultiplicationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPixelWiseMultiplicationKernel(const NEPixelWiseMultiplicationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPixelWiseMultiplicationKernel &operator=(const NEPixelWiseMultiplicationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEPixelWiseMultiplicationKernel(NEPixelWiseMultiplicationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEPixelWiseMultiplicationKernel &operator=(NEPixelWiseMultiplicationKernel &&) = default; + /** Default destructor */ + ~NEPixelWiseMultiplicationKernel() = default; + /** Initialise the kernel's input, output and border mode. + * + * Valid configurations (Input1,Input2) -> Output : + * + * Support: Broadcast? Scale=1/255? + * - (U8,U8) -> U8, S16 N Y + * - (U8,S16) -> S16 N Y + * - (S16,U8) -> S16 N Y + * - (S16,S16) -> S16 N Y + * - (S32,S32) -> S32 Y N + * - (F16,F16) -> F16 N Y + * - (F32,F32) -> F32 Y Y + * - (QASYMM8,QASYMM8) -> QASYMM8 Y Y + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED Y Y + * - (QSYMM16,QSYMM16) -> QSYMM16, S32 N Y + * + * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. + * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. + * + * @param[in] input1 First input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 + * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 + * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255 + * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype + * @param[in] rounding_policy Rounding policy. + */ + void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); + /** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplicationKernel + * + * Valid configurations (Input1,Input2) -> Output : + * Support: Broadcast? Scale=1/255? + * - (U8,U8) -> U8, S16 N Y + * - (U8,S16) -> S16 N Y + * - (S16,U8) -> S16 N Y + * - (S16,S16) -> S16 N Y + * - (S32,S32) -> S32 Y N + * - (F16,F16) -> F16 N Y + * - (F32,F32) -> F32 Y Y + * - (QASYMM8,QASYMM8) -> QASYMM8 Y Y + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED Y Y + * - (QSYMM16,QSYMM16) -> QSYMM16, S32 N Y + * + * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. + * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. + * + * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 + * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 + * @param[in] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255 + * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype + * @param[in] rounding_policy Rounding policy. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); + + // Inherited methods overridden + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised multiplication functions with integer scaling factor + * + * @param[in] in1 Input1 tensor object. + * @param[in] in2 Input2 tensor object. + * @param[out] out Output tensor object. + * @param[in] window Region on which to execute the kernel + * @param[in] scale Integer scale factor. + */ + using MulFunctionInt = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, int scale); + /** Common signature for all the specialised multiplication functions with float scaling factor + * + * @param[in] in1 Input1 tensor object. + * @param[in] in2 Input2 tensor object. + * @param[out] out Output tensor object. + * @param[in] window Region on which to execute the kernel + * @param[in] scale Float scale factor. + */ + using MulFunctionFloat = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, float scale); + /** Common signature for all the specialised QASYMM8 multiplication functions with float scaling factor + * + * @param[in] in1 Input1 tensor object. + * @param[in] in2 Input2 tensor object. + * @param[out] out Output tensor object. + * @param[in] window Region on which to execute the kernel + * @param[in] scale Float scale factor. + * + */ + using MulFunctionQuantized = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, float scale); + + MulFunctionFloat *_func_float; + MulFunctionInt *_func_int; + MulFunctionQuantized *_func_quantized; + +private: + float _scale; + int _scale_exponent; +}; + +/** Interface for the complex pixelwise multiplication kernel. */ +class NEComplexPixelWiseMultiplicationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEComplexPixelWiseMultiplicationKernel"; + } + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input1 An input tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor). + * @param[in] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1. + */ + void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEComplexPixelWiseMultiplicationKernel + * + * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor). + * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; +}; + +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index f9636dcb8d..0f0b9eed5a 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h" +#include "src/core/NEON/kernels/NEPoolingLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.h b/src/core/NEON/kernels/NEPoolingLayerKernel.h new file mode 100644 index 0000000000..aa3d2f3f01 --- /dev/null +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.h @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H +#define ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the pooling layer kernel */ +class NEPoolingLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEPoolingLayerKernel"; + } + /** Default constructor */ + NEPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPoolingLayerKernel(const NEPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPoolingLayerKernel &operator=(const NEPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEPoolingLayerKernel(NEPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEPoolingLayerKernel &operator=(NEPoolingLayerKernel &&) = default; + /** Default destructor */ + ~NEPoolingLayerKernel() = default; + /** Set the input and output tensors. + * + * @note F16 are supported for pool sizes 2 and 3 only + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. + */ + void configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info, ITensor *indices = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref NEPoolingLayerKernel + * + * @note F16 are supported for pool sizes 2 and 3 only + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /** Function to perform 2x2 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void pooling2_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + void pooling2_f32_nhwc_maxpool_indices(const Window &window_input, const Window &window); + /** Function to perform MxN pooling for 32-bit floating point values. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void poolingMxN_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Function to perform MxN pooling for 32-bit floating point values (NHWC). + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void poolingMxN_f32_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Function to perform 7x7 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void pooling7_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Function to perform 3x3 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void pooling3_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Function to perform 2x2 pooling for float16_t. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void pooling2_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Function to perform 2x2 pooling and compute the pooling indices for FP32/FP16. The indices can be used for max unpool. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling2_nchw_maxpool_indices(const Window &window_input, const Window &window); + /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + void pooling2_f16_nhwc_maxpool_indices(const Window &window_input, const Window &window); + /** Function to perform 3x3 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void pooling3_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Function to perform MxN pooling for 16-bit floating point values. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void poolingMxN_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Function to perform MxN pooling for 16-bit floating point values. (NHWC) + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void poolingMxN_f16_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Template function to perform 2x2 pooling for 8bit quantized fixed point. (NCHW) + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + template + void pooling2_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Template function to perform 3x3 pooling for 8bit quantized fixed point. (NCHW) + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + template + void pooling3_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Template function to perform MxN pooling for 8-bit quantized. (NCHW) + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + template + void poolingMxN_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Template function to perform MxN pooling for 8-bit quantized. (NHWC) + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + template + void poolingMxN_q8_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Common signature for all the specialised Pooling functions + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + using PoolingFunction = void (NEPoolingLayerKernel::*)(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding); + +private: + PoolingFunction _func; + const ITensor *_input; + ITensor *_output; + ITensor *_indices; + PoolingLayerInfo _pool_info; + DataLayout _data_layout; + unsigned int _num_elems_processed_per_iteration; + BorderSize _border_size; + bool _is_square; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp index 06a1f14e5f..6757affae8 100644 --- a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h" +#include "src/core/NEON/kernels/NEPriorBoxLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEPriorBoxLayerKernel.h b/src/core/NEON/kernels/NEPriorBoxLayerKernel.h new file mode 100644 index 0000000000..430a47f9f8 --- /dev/null +++ b/src/core/NEON/kernels/NEPriorBoxLayerKernel.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H +#define ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to calculate prior boxes */ +class NEPriorBoxLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEPriorBoxLayerKernel"; + } + /** Default constructor */ + NEPriorBoxLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPriorBoxLayerKernel(const NEPriorBoxLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPriorBoxLayerKernel &operator=(const NEPriorBoxLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEPriorBoxLayerKernel(NEPriorBoxLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEPriorBoxLayerKernel &operator=(NEPriorBoxLayerKernel &&) = default; + /** Default destructor */ + ~NEPriorBoxLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. + * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 + * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input + * @param[in] info Prior box layer info. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, const PriorBoxLayerInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref NEPriorBoxLayerKernel + * + * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC. + * @param[in] input2 Second source tensor info. Data types and layouts supported: same as @p input1 + * @param[in] output Destination tensor info. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input + * @param[in] info Prior box layer info. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Stores the coordinates of the calculated prior boxes. + * + * @param[out] out Output pointer. + * @param[in] offset Output offset to write to. + * @param[in] center_x Center pixel value on x-axis. + * @param[in] center_y Center pixel value on y-axis. + * @param[in] box_width Prior box width. + * @param[in] box_height Prior box height. + * @param[in] width Input width. + * @param[in] height Input height. + */ + void store_coordinates(float *out, const int offset, const float center_x, const float center_y, const float box_width, const float box_height, const int width, const int height); + /** Function to calculate prior boxes. + * + * @param[in] window Input region on which to execute the kernel. + */ + void calculate_prior_boxes(const Window &window); + + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; + PriorBoxLayerInfo _info; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp b/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp index 55585b4e00..8c1c8cf56b 100644 --- a/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" +#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h b/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h new file mode 100644 index 0000000000..ba68171a59 --- /dev/null +++ b/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H +#define ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H + +#include "src/core/NEON/INEKernel.h" +#include + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform layer normalization */ +class NEQLSTMLayerNormalizationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEQLSTMLayerNormalizationKernel"; + } + /** Default constructor */ + NEQLSTMLayerNormalizationKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEQLSTMLayerNormalizationKernel(const NEQLSTMLayerNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEQLSTMLayerNormalizationKernel &operator=(const NEQLSTMLayerNormalizationKernel &) = delete; + /** Default Move Constructor. */ + NEQLSTMLayerNormalizationKernel(NEQLSTMLayerNormalizationKernel &&) = default; + /** Default move assignment operator */ + NEQLSTMLayerNormalizationKernel &operator=(NEQLSTMLayerNormalizationKernel &&) = default; + /** Default destructor */ + ~NEQLSTMLayerNormalizationKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QSYMM16. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] weight Weight tensor. Data types supported: Same as @p input. + * @param[in] bias Bias tensor. Data types supported: S32 + */ + void configure(const ITensor *input, ITensor *output, const ITensor *weight, const ITensor *bias); + /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayerNormalizationKernel + * + * @param[in] input Source tensor info. Data types supported: QSYMM16. + * @param[in] output Destination tensor info. Data types supported: Same as @p input. + * @param[in] weight Weight tensor info. Data types supported: Same as @p input. + * @param[in] bias Bias tensor info. Data types supported: S32 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias); + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + // constants + static constexpr uint32_t max_input_dimension{ 2 }; /**< The maximum input dimension supported */ + static constexpr uint32_t max_weight_dimension{ 1 }; /**< The maximum weight dimension supported */ + static constexpr uint32_t max_bias_dimension{ 1 }; /**< The maximum bias dimension supported */ + static constexpr uint32_t vector_size_byte{ 16 }; /**< Computation vector size in byte */ + + using ComputeFuncType = std::function; + + ComputeFuncType _fn{}; /**< Function pointer to computation function */ + + const ITensor *_input + { + nullptr + }; /**< Input tensor */ + const ITensor *_weight + { + nullptr + }; /**< Weight tensor */ + const ITensor *_bias + { + nullptr + }; /**< Bias tensor */ + ITensor *_output{ nullptr }; /**< Output tensor */ + + int32_t _output_multiplier{}; /**< Multiplier for output values */ + int32_t _output_shift{}; /**< Shift value for output values */ + + int32_t _window_start_x{}; /**< The beginning of x-axis iteration */ + int32_t _window_end_x{}; /**< The end of x-axis iteration */ + int32_t _window_step_x{}; /**< The size of x-axis iteration's step */ + + Window _inout_window{}; /**< Window for input and output tensor */ + Window _weight_window{}; /**< Window for weight and bias tensor */ + + /** Function to configure initial windows for destination of computation + * + * @param[in] Target destination tensor to use for output window + * + * @return configured window + */ + Window configure_window(ITensor *target); + // Function to compute for data type QSYMM16 + void compute_qsymm16(); + /** Function to compute summation and summation of squared input of the given input pointer + * + * @param[in] Input_ptr pointer to input array + * + */ + std::pair sum_qsymm16(const int16_t *input_ptr); + /** Function to normalize values using computed mean and standard deviation + * + * @param[in] input_ptr Pointer to input array + * @param[in] output_ptr Pointer to output array + * @param[in] weight_ptr Pointer to weight array + * @param[in] bias_ptr Pointer to bias array + * @param[in] mean Mean value + * @param[in] inv_std_mul Quantized multiplier for standard deviation + * @param[in] inv_std_shift Shift for standard deviation + * + */ + void normalize_qasymm16(const int16_t *input_ptr, + int16_t *output_ptr, + const int16_t *weight_ptr, + const int32_t *bias_ptr, + int32_t mean, int32_t inv_std_mul, int32_t inv_std_shift); + /** Function to compute output quantization information */ + QuantizationInfo compute_output_qinfo(); +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp index 990e4b67bc..ff3d9fff96 100644 --- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h" +#include "src/core/NEON/kernels/NEQuantizationLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.h b/src/core/NEON/kernels/NEQuantizationLayerKernel.h new file mode 100644 index 0000000000..5ee0ed4412 --- /dev/null +++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the quantization layer kernel. + * + * @note The implementation supports only 3D input tensors + * + */ +class NEQuantizationLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEQuantizationLayerKernel"; + } + /** Default constructor */ + NEQuantizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEQuantizationLayerKernel(const NEQuantizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEQuantizationLayerKernel &operator=(const NEQuantizationLayerKernel &) = delete; + /** Default Move Constructor. */ + NEQuantizationLayerKernel(NEQuantizationLayerKernel &&) = default; + /** Default move assignment operator */ + NEQuantizationLayerKernel &operator=(NEQuantizationLayerKernel &&) = default; + /** Default destructor */ + ~NEQuantizationLayerKernel() = default; + /** Set the input, output. + * + * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. + * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. + * + * @note Output auto initialization is not supported by this kernel + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayerKernel + * + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. + * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised @ref NEQuantizationLayerKernel functions + * + * @param[in] window Region on which to execute the kernel. + */ + using QuantizationFunctionExecutorPtr = void (NEQuantizationLayerKernel::*)(const Window &window); + /** Function to apply QASYMM8 or QASYMM8_SIGNED quantization on a tensor. + * + * @param[in] window Region on which to execute the kernel. + */ + template + void run_quantize_qasymm8(const Window &window); + /** Function to apply QASYMM16 quantization on a tensor. + * + * @param[in] window Region on which to execute the kernel. + */ + template + void run_quantize_qasymm16(const Window &window); + + const ITensor *_input; + ITensor *_output; + + QuantizationFunctionExecutorPtr _func; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp b/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp index 79f7888eba..c48cda8b8e 100644 --- a/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp +++ b/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h" +#include "src/core/NEON/kernels/NEROIAlignLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEROIAlignLayerKernel.h b/src/core/NEON/kernels/NEROIAlignLayerKernel.h new file mode 100644 index 0000000000..d909fb1758 --- /dev/null +++ b/src/core/NEON/kernels/NEROIAlignLayerKernel.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H +#define ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the RoIAlign kernel. + */ +class NEROIAlignLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEROIAlignLayerKernel"; + } + + /** Constructor */ + NEROIAlignLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEROIAlignLayerKernel(const NEROIAlignLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEROIAlignLayerKernel &operator=(const NEROIAlignLayerKernel &) = delete; + /** Default Move Constructor. */ + NEROIAlignLayerKernel(NEROIAlignLayerKernel &&) = default; + /** Default move assignment operator. */ + NEROIAlignLayerKernel &operator=(NEROIAlignLayerKernel &&) = default; + /** Default destructor */ + ~NEROIAlignLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. + * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner + * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. + * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, otherwise same as @p input + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + */ + void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info); + /** Static function to check if given info will lead to a valid configuration of @ref NEROIAlignLayerKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. + * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, + * otherwise same as @p input + * @param[in] output Destination tensor info. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + * + * @return a Status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + template + void internal_run(const Window &window, const ThreadInfo &info); + + const ITensor *_input; + ITensor *_output; + const ITensor *_rois; + ROIPoolingLayerInfo _pool_info; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H*/ diff --git a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp index a3171d9aa6..40dae828a3 100644 --- a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h" +#include "src/core/NEON/kernels/NEROIPoolingLayerKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" diff --git a/src/core/NEON/kernels/NEROIPoolingLayerKernel.h b/src/core/NEON/kernels/NEROIPoolingLayerKernel.h new file mode 100644 index 0000000000..36424172a6 --- /dev/null +++ b/src/core/NEON/kernels/NEROIPoolingLayerKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H +#define ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +#include "arm_compute/core/IArray.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the ROI pooling layer kernel */ +class NEROIPoolingLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEROIPoolingLayerKernel"; + } + /** Default constructor */ + NEROIPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEROIPoolingLayerKernel(const NEROIPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEROIPoolingLayerKernel &operator=(const NEROIPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEROIPoolingLayerKernel(NEROIPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEROIPoolingLayerKernel &operator=(NEROIPoolingLayerKernel &&) = default; + /** Default destructor */ + ~NEROIPoolingLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F32. + * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner + * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16 + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as that specified by @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois tensor. + */ + void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + const ITensor *_rois; + ITensor *_output; + ROIPoolingLayerInfo _pool_info; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NERangeKernel.cpp b/src/core/NEON/kernels/NERangeKernel.cpp index 3466794b11..8d11122ab2 100644 --- a/src/core/NEON/kernels/NERangeKernel.cpp +++ b/src/core/NEON/kernels/NERangeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NERangeKernel.h" +#include "src/core/NEON/kernels/NERangeKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NERangeKernel.h b/src/core/NEON/kernels/NERangeKernel.h new file mode 100644 index 0000000000..7c42ef11dc --- /dev/null +++ b/src/core/NEON/kernels/NERangeKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NERANGEKERNEL_H +#define ARM_COMPUTE_NERANGEKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Kernel class for Range + * + * range generates a 1-D tensor containing a sequence of numbers that begins at 'start' and extends by increments + * of 'step' up to but not including 'end'. + */ +class NERangeKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NERangeKernel"; + } + /** Default constructor */ + NERangeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERangeKernel(const NERangeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERangeKernel &operator=(const NERangeKernel &) = delete; + /** Allow instances of this class to be moved */ + NERangeKernel(NERangeKernel &&) = default; + /** Allow instances of this class to be moved */ + NERangeKernel &operator=(NERangeKernel &&) = default; + /** Default destructor */ + ~NERangeKernel() = default; + /** Initialize the kernel's output tensor, start, end and step of the sequence. + * + * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] start The starting value of the sequence. + * @param[in] end The ending (not including) value of the sequence. + * @param[in] step The gap between each pair of values in the sequence. + */ + void configure(ITensor *output, float start, float end, float step); + /** Static function to check if given info will lead to a valid configuration of @ref NERangeKernel + * + * @param[in] output Output tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] start The starting value of the sequence. + * @param[in] end The ending (not including) value of the sequence. + * @param[in] step The gap between each pair of values in the sequence. + * + * @return a status + */ + static Status validate(const ITensorInfo *output, float start, float end, float step); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + using RangeFunction = void(ITensor *output, float start, float step, const Window &window); + + RangeFunction *_func; /**< Range function to be called */ + float _start; /**< Start of sequence */ + float _end; /**< End of sequence */ + float _step; /**< Increment/step value */ + ITensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NERANGEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index 716b092396..4e63dd95aa 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -21,18 +21,18 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h" +#include "src/core/NEON/kernels/NEReductionOperationKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/CPP/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/NEON/NEMath.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.h b/src/core/NEON/kernels/NEReductionOperationKernel.h new file mode 100644 index 0000000000..dfc105adae --- /dev/null +++ b/src/core/NEON/kernels/NEReductionOperationKernel.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H +#define ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a reduction operation + * + * @note For ARG_MIN/ARG_MAX reduction, the default data type for an uninitialized + * output tensor is signed 32-bit integer (S32). It is the user's responsibility + * to check that the results do not overflow because the indices are computed + * in unsigned 32-bit (U32). + */ +class NEReductionOperationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEReductionOperationKernel"; + } + /** Default constructor */ + NEReductionOperationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReductionOperationKernel(const NEReductionOperationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReductionOperationKernel &operator=(const NEReductionOperationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEReductionOperationKernel(NEReductionOperationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEReductionOperationKernel &operator=(NEReductionOperationKernel &&) = default; + /** Default destructor */ + ~NEReductionOperationKernel() = default; + + /** Set the source, destination of the kernel + * + * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. + * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 + * @param[in] op Reduction operation to perform. + */ + void configure(const ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op); + + /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperationKernel. + * + * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. + * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 + * @param[in] op Reduction operation to perform. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; + unsigned int _reduction_axis; + ReductionOperation _op; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NERemapKernel.cpp b/src/core/NEON/kernels/NERemapKernel.cpp index f698439507..b334a11227 100644 --- a/src/core/NEON/kernels/NERemapKernel.cpp +++ b/src/core/NEON/kernels/NERemapKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NERemapKernel.h" +#include "src/core/NEON/kernels/NERemapKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NERemapKernel.h b/src/core/NEON/kernels/NERemapKernel.h new file mode 100644 index 0000000000..8fe1ba5855 --- /dev/null +++ b/src/core/NEON/kernels/NERemapKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEREMAPKERNEL_H +#define ARM_COMPUTE_NEREMAPKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a remap on a tensor */ +class NERemapKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NERemapKernel"; + } + /** Default constructor */ + NERemapKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERemapKernel(const NERemapKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERemapKernel &operator=(const NERemapKernel &) = delete; + /** Allow instances of this class to be moved */ + NERemapKernel(NERemapKernel &&) = default; + /** Allow instances of this class to be moved */ + NERemapKernel &operator=(NERemapKernel &&) = default; + /** Default destructor */ + ~NERemapKernel() = default; + + /** Initialize the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] map_x Map for X coordinates. Data type supported: F32. + * @param[in] map_y Map for Y coordinates. Data type supported: F32. + * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. + * @param[in] policy The interpolation type. + */ + void configure(const ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /** function to perform nearest interpolation on the given window */ + void remap_nearest(const Window &window); + /** function to perform bilinear interpolation on the given window */ + void remap_bilinear(const Window &window); + /** Remap function to use for the particular interpolation type passed to configure() */ + void (NERemapKernel::*_func)(const Window &window); + + const ITensor *_input; /**< Input image */ + ITensor *_output; /**< Output image */ + const ITensor *_map_x; /**< Input remap x coordinates */ + const ITensor *_map_y; /**< Input remap y coordinates */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEREMAPKERNEL_H */ diff --git a/src/core/NEON/kernels/NEReorgLayerKernel.cpp b/src/core/NEON/kernels/NEReorgLayerKernel.cpp index 1c48a5c93d..0dcb439665 100644 --- a/src/core/NEON/kernels/NEReorgLayerKernel.cpp +++ b/src/core/NEON/kernels/NEReorgLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEReorgLayerKernel.h" +#include "src/core/NEON/kernels/NEReorgLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEReorgLayerKernel.h b/src/core/NEON/kernels/NEReorgLayerKernel.h new file mode 100644 index 0000000000..eac91154a1 --- /dev/null +++ b/src/core/NEON/kernels/NEReorgLayerKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEREORGLAYERKERNEL_H +#define ARM_COMPUTE_NEREORGLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the kernel to perform tensor re-organization */ +class NEReorgLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEReorgLayerKernel"; + } + /** Default constructor */ + NEReorgLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReorgLayerKernel(const NEReorgLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReorgLayerKernel &operator=(const NEReorgLayerKernel &) = delete; + /** Default Move Constructor. */ + NEReorgLayerKernel(NEReorgLayerKernel &&) = default; + /** Default move assignment operator */ + NEReorgLayerKernel &operator=(NEReorgLayerKernel &&) = default; + /** Default destructor */ + ~NEReorgLayerKernel() = default; + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. Data type supported: All + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] stride Stride to be used during data re-organization. + * It defines the spatial distance between 2 consecutive pixels in the x and y direction + */ + void configure(const ITensor *input, ITensor *output, int32_t stride); + + /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel + * + * @param[in] input Source tensor info. Data type supported: All + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * @param[in] stride Stride to be used during data re-organization + * It defines the spatial distance between 2 consecutive pixels in the x and y direction + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t stride); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; + int32_t _stride; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEREORGLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp index 7946812811..462404f996 100644 --- a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp +++ b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp @@ -21,18 +21,18 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" +#include "src/core/NEON/kernels/NEReshapeLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "src/core/AccessWindowStatic.h" #include "src/core/CPP/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEReshapeLayerKernel.h b/src/core/NEON/kernels/NEReshapeLayerKernel.h new file mode 100644 index 0000000000..ecec8d9f1f --- /dev/null +++ b/src/core/NEON/kernels/NEReshapeLayerKernel.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NERESHAPELAYERKERNEL_H +#define ARM_COMPUTE_NERESHAPELAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the kernel to perform tensor reshaping */ +class NEReshapeLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEReshapeLayerKernel"; + } + /** Default constructor */ + NEReshapeLayerKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReshapeLayerKernel(const NEReshapeLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReshapeLayerKernel &operator=(const NEReshapeLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEReshapeLayerKernel(NEReshapeLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEReshapeLayerKernel &operator=(NEReshapeLayerKernel &&) = default; + /** Default destructor */ + ~NEReshapeLayerKernel() = default; + /** Set the input and output info of the kernel + * + * @param[in] input Source tensor info. Data type supported: All + * @param[out] output Destination tensor info. Data type supported: Same as @p input + */ + void configure(const ITensorInfo *input, ITensorInfo *output); + + /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel + * + * @param[in] input Source tensor info. Data type supported: All + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NERESHAPELAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp index 2c081cb917..21c758053a 100644 --- a/src/core/NEON/kernels/NEReverseKernel.cpp +++ b/src/core/NEON/kernels/NEReverseKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEReverseKernel.h" +#include "src/core/NEON/kernels/NEReverseKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" diff --git a/src/core/NEON/kernels/NEReverseKernel.h b/src/core/NEON/kernels/NEReverseKernel.h new file mode 100644 index 0000000000..07b547a327 --- /dev/null +++ b/src/core/NEON/kernels/NEReverseKernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEREVERSEKERNEL_H +#define ARM_COMPUTE_NEREVERSEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the reverse layer kernel. */ +class NEReverseKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEReverseKernel"; + } + /** Default constructor */ + NEReverseKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReverseKernel(const NEReverseKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReverseKernel &operator=(const NEReverseKernel &) = delete; + /** Allow instances of this class to be moved */ + NEReverseKernel(NEReverseKernel &&) = default; + /** Allow instances of this class to be moved */ + NEReverseKernel &operator=(NEReverseKernel &&) = default; + /** Default destructor */ + ~NEReverseKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 + */ + void configure(const ITensor *input, ITensor *output, const ITensor *axis); + + /** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] output Output tensor info. Data type supported: Same as @p input + * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; + const ITensor *_axis; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEREVERSEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp index 2e40759050..5a6d49bf07 100644 --- a/src/core/NEON/kernels/NEScaleKernel.cpp +++ b/src/core/NEON/kernels/NEScaleKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEScaleKernel.h" +#include "src/core/NEON/kernels/NEScaleKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Window.h" diff --git a/src/core/NEON/kernels/NEScaleKernel.h b/src/core/NEON/kernels/NEScaleKernel.h new file mode 100644 index 0000000000..a3786db5b7 --- /dev/null +++ b/src/core/NEON/kernels/NEScaleKernel.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESCALEKERNEL_H +#define ARM_COMPUTE_NESCALEKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform scaling on a tensor */ +class NEScaleKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEScaleKernel"; + } + /** Default constructor */ + NEScaleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScaleKernel(const NEScaleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScaleKernel &operator=(const NEScaleKernel &) = delete; + /** Allow instances of this class to be moved */ + NEScaleKernel(NEScaleKernel &&) = default; + /** Allow instances of this class to be moved */ + NEScaleKernel &operator=(NEScaleKernel &&) = default; + /** Default destructor */ + ~NEScaleKernel() = default; + + /** Initialise the kernel's inputs, output and interpolation policy + * + * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor + * @note Using @p policy Area only supports data layout NCHW and input data type U8. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32 + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32 + * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] info @ref ScaleKernelInfo to use for configuration + */ + void configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output, + const ScaleKernelInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref NEScaleKernel + * + * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor + * @note Using @p policy Area only supports data layout NCHW and input data type U8. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32 + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32 + * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32. + * @param[in] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] info @ref ScaleKernelInfo to use for validation + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *dx, const ITensorInfo *dy, const ITensorInfo *offsets, ITensorInfo *output, + const ScaleKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** function to perform scale using area interpolation on the given window + * + * @note Used only in case down-sampling. + */ + void scale_area_nchw_u8(const Window &window); + + /** function to perform scale using bilinear interpolation on the given window */ + template + void scale_bilinear_nchw(const Window &window); + /** function to perform scale using bilinear interpolation on the given window */ + template + void scale_bilinear_nhwc(const Window &window); + /** function to perform scale using bilinear interpolation on the given window */ + template + void scale_bilinear_qasymm(const Window &window); + + /** function to perform scale using nearest neighbour on the given window */ + template + void scale_nearest_nchw(const Window &window); + /** function to perform scale using nearest neighbour on the given window */ + template + void scale_nearest_nhwc(const Window &window); + + /** Scale function to use for the particular function to use */ + using ScaleFunctionPtr = void (NEScaleKernel::*)(const Window &window); + + ScaleFunctionPtr _func; + const ITensor *_offsets; + const ITensor *_dx; + const ITensor *_dy; + const ITensor *_input; + ITensor *_output; + InterpolationPolicy _policy; + BorderMode _border_mode; + PixelValue _constant_border_value; + float _sampling_offset; + bool _align_corners; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NESCALEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEScharr3x3Kernel.cpp b/src/core/NEON/kernels/NEScharr3x3Kernel.cpp index eb1dc65c0f..58b8caa2b6 100644 --- a/src/core/NEON/kernels/NEScharr3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEScharr3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h" +#include "src/core/NEON/kernels/NEScharr3x3Kernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEScharr3x3Kernel.h b/src/core/NEON/kernels/NEScharr3x3Kernel.h new file mode 100644 index 0000000000..920410ebb3 --- /dev/null +++ b/src/core/NEON/kernels/NEScharr3x3Kernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESCHARR3x3KERNEL_H +#define ARM_COMPUTE_NESCHARR3x3KERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run a 3x3 Scharr filter on a tensor. + * +* @f[ +* \mathbf{G}_x=\begin{vmatrix} +* -3 & 0 & +3\\ +* -10& 0 & +10\\ +* -3 & 0 & +3 +* \end{vmatrix} +* @f] +*/ +class NEScharr3x3Kernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEScharr3x3Kernel"; + } + /** Default constructor */ + NEScharr3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScharr3x3Kernel(const NEScharr3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScharr3x3Kernel &operator=(const NEScharr3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NEScharr3x3Kernel(NEScharr3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NEScharr3x3Kernel &operator=(NEScharr3x3Kernel &&) = default; + /** Default destructor */ + ~NEScharr3x3Kernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + bool _run_scharr_x; /**< Do we need to run Scharr X ? */ + bool _run_scharr_y; /**< Do we need to run Scharr Y ? */ + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< Output tensor for scharr X */ + ITensor *_output_y; /**< Output tensor for scharr Y */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NESCHARR3x3KERNEL_H */ diff --git a/src/core/NEON/kernels/NESelectKernel.cpp b/src/core/NEON/kernels/NESelectKernel.cpp index 2f36db2ddb..9cf9b98a0c 100644 --- a/src/core/NEON/kernels/NESelectKernel.cpp +++ b/src/core/NEON/kernels/NESelectKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NESelectKernel.h" +#include "src/core/NEON/kernels/NESelectKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NESelectKernel.h b/src/core/NEON/kernels/NESelectKernel.h new file mode 100644 index 0000000000..f7142feff8 --- /dev/null +++ b/src/core/NEON/kernels/NESelectKernel.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESELECTKERNEL_H +#define ARM_COMPUTE_NESELECTKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the select kernel + * + * Select is computed by: + * @f[ output(i) = condition(i) ? x(i) : y(i) @f] + * + */ +class NESelectKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NESelectKernel"; + } + /** Default constructor */ + NESelectKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESelectKernel(const NESelectKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESelectKernel &operator=(const NESelectKernel &) = delete; + /** Allow instances of this class to be moved */ + NESelectKernel(NESelectKernel &&) = default; + /** Allow instances of this class to be moved */ + NESelectKernel &operator=(NESelectKernel &&) = default; + /** Default destructor */ + ~NESelectKernel() = default; + + /** Common signature for all the specialised elementwise functions + * + * @param[in] c Condition input tensor. Data types supported: U8. + * @param[in] x First input tensor. Data types supported: All. + * @param[out] y Second input tensor. Data types supported: Same as @p x + * @param[in] output Output tensor. Data types supported: Same as @p x + */ + void configure(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output); + + /** Validate the argument passed to the kernel + * + * @param[in] c Condition input tensor. Data types supported: U8. + * @param[in] x First input tensor. Data types supported: All. + * @param[in] y Second input tensor. Data types supported: Same as @p x + * @param[in] output Output tensor. Data types supported: Same as @p x. + * + * @return a status + */ + static Status validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised select functions + * + * @param[in] c Condition input tensor. Data types supported: U8. + * @param[in] x First input tensor. Data types supported: All. + * @param[in] y Second input tensor. Data types supported: Same as @p x + * @param[in] output Output tensor. Data types supported: Same as @p x. + */ + using SelectFunction = void(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window); + + /** Select function to use for the particular tensor types passed to configure() */ + SelectFunction *_function; + const ITensor *_c; /**< Condition tensor */ + const ITensor *_x; /**< Source tensor 1 */ + const ITensor *_y; /**< Source tensor 2 */ + ITensor *_output; /**< Destination tensor */ + bool _has_same_rank; /**< Flag that indicates if condition tensor and other inputs have the same rank */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NESELECTKERNEL_H */ diff --git a/src/core/NEON/kernels/NESobel3x3Kernel.cpp b/src/core/NEON/kernels/NESobel3x3Kernel.cpp index 1c7089b641..ecf6b59c29 100644 --- a/src/core/NEON/kernels/NESobel3x3Kernel.cpp +++ b/src/core/NEON/kernels/NESobel3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NESobel3x3Kernel.h" +#include "src/core/NEON/kernels/NESobel3x3Kernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NESobel3x3Kernel.h b/src/core/NEON/kernels/NESobel3x3Kernel.h new file mode 100644 index 0000000000..2c3eaf5eb7 --- /dev/null +++ b/src/core/NEON/kernels/NESobel3x3Kernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESOBEL3x3KERNEL_H +#define ARM_COMPUTE_NESOBEL3x3KERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run a 3x3 Sobel X filter on a tensor. + * + * @f[ + * \mathbf{G}_x=\begin{vmatrix} + * -1 & 0 & +1\\ + * -2 & 0 & +2\\ + * -1 & 0 & +1 + * \end{vmatrix} + * @f] +*/ +class NESobel3x3Kernel : public INEKernel +{ +public: + const char *name() const override + { + return "NESobel3x3Kernel"; + } + /** Default constructor */ + NESobel3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel3x3Kernel(const NESobel3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel3x3Kernel &operator=(const NESobel3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel3x3Kernel(NESobel3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel3x3Kernel &operator=(NESobel3x3Kernel &&) = default; + /** Default destructor */ + ~NESobel3x3Kernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< Output tensor for sobel X */ + ITensor *_output_y; /**< Output tensor for sobel Y */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NESOBEL3x3KERNEL_H */ diff --git a/src/core/NEON/kernels/NESobel5x5Kernel.cpp b/src/core/NEON/kernels/NESobel5x5Kernel.cpp index 2421ea72ad..5a66b1f364 100644 --- a/src/core/NEON/kernels/NESobel5x5Kernel.cpp +++ b/src/core/NEON/kernels/NESobel5x5Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h" +#include "src/core/NEON/kernels/NESobel5x5Kernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NESobel5x5Kernel.h b/src/core/NEON/kernels/NESobel5x5Kernel.h new file mode 100644 index 0000000000..bd5eb29296 --- /dev/null +++ b/src/core/NEON/kernels/NESobel5x5Kernel.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESOBEL5x5KERNEL_H +#define ARM_COMPUTE_NESOBEL5x5KERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. + * + */ +class NESobel5x5HorKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NESobel5x5HorKernel"; + } + /** Default constructor */ + NESobel5x5HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5HorKernel(const NESobel5x5HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5HorKernel &operator=(const NESobel5x5HorKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel5x5HorKernel(NESobel5x5HorKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel5x5HorKernel &operator=(NESobel5x5HorKernel &&) = default; + /** Default destructor */ + ~NESobel5x5HorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< X output of horizontal pass */ + ITensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 5x5 Sobel Y filter on a tensor. + * +*/ +class NESobel5x5VertKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NESobel5x5VertKernel"; + } + /** Default constructor */ + NESobel5x5VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5VertKernel(const NESobel5x5VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5VertKernel &operator=(const NESobel5x5VertKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel5x5VertKernel(NESobel5x5VertKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel5x5VertKernel &operator=(NESobel5x5VertKernel &&) = default; + /** Default destructor */ + ~NESobel5x5VertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input_x Input for X (X output of hor pass). Data type supported: S16. + * @param[in] input_y Input for Y (Y output of hor pass). Data type supported: S16. + * @param[out] output_x Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(ITensor *input_x, ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + ITensor *_input_x; /**< X input (X output of the hor pass) */ + ITensor *_input_y; /**< Y input (Y output of the hor pass) */ + ITensor *_output_x; /**< X output of sobel */ + ITensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NESOBEL5x5KERNEL_H */ diff --git a/src/core/NEON/kernels/NESobel7x7Kernel.cpp b/src/core/NEON/kernels/NESobel7x7Kernel.cpp index 779d67a044..835b333a10 100644 --- a/src/core/NEON/kernels/NESobel7x7Kernel.cpp +++ b/src/core/NEON/kernels/NESobel7x7Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h" +#include "src/core/NEON/kernels/NESobel7x7Kernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NESobel7x7Kernel.h b/src/core/NEON/kernels/NESobel7x7Kernel.h new file mode 100644 index 0000000000..c5a3899bab --- /dev/null +++ b/src/core/NEON/kernels/NESobel7x7Kernel.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESOBEL7x7KERNEL_H +#define ARM_COMPUTE_NESOBEL7x7KERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. + * + */ +class NESobel7x7HorKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NESobel7x7HorKernel"; + } + /** Default constructor */ + NESobel7x7HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7HorKernel(const NESobel7x7HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7HorKernel &operator=(const NESobel7x7HorKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel7x7HorKernel(NESobel7x7HorKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel7x7HorKernel &operator=(NESobel7x7HorKernel &&) = default; + /** Default destructor */ + ~NESobel7x7HorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< X output of horizontal pass */ + ITensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 7x7 Sobel Y filter on a tensor. + * +*/ +class NESobel7x7VertKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NESobel7x7VertKernel"; + } + /** Default constructor */ + NESobel7x7VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7VertKernel(const NESobel7x7VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7VertKernel &operator=(const NESobel7x7VertKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel7x7VertKernel(NESobel7x7VertKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel7x7VertKernel &operator=(NESobel7x7VertKernel &&) = default; + /** Default destructor */ + ~NESobel7x7VertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set + * @note If output_x is set then input_x must be set too + * @note If output_y is set then input_y must be set too + * + * @param[in] input_x (Optional) Input for X (X output of hor pass). Data type supported: S32. + * @param[in] input_y (Optional) Input for Y (Y output of hor pass). Data type supported: S32. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input_x, const ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + const ITensor *_input_x; /**< X input (X output of the hor pass) */ + const ITensor *_input_y; /**< Y input (Y output of the hor pass) */ + ITensor *_output_x; /**< X output of sobel */ + ITensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NESOBEL7x7KERNEL_H */ diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index 13f0a54275..97797cefde 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" +#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.h b/src/core/NEON/kernels/NESoftmaxLayerKernel.h new file mode 100644 index 0000000000..adc2e57258 --- /dev/null +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H +#define ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the identifying the max value of 1D Logits */ +class NELogits1DMaxKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NELogits1DMaxKernel"; + } + /** Default constructor */ + NELogits1DMaxKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DMaxKernel(const NELogits1DMaxKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DMaxKernel &operator=(const NELogits1DMaxKernel &) = delete; + /** Allow instances of this class to be moved */ + NELogits1DMaxKernel(NELogits1DMaxKernel &&) = default; + /** Allow instances of this class to be moved */ + NELogits1DMaxKernel &operator=(NELogits1DMaxKernel &&) = default; + /** Default destructor */ + ~NELogits1DMaxKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DMaxKernel + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Destination tensor. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + using Logits1DMaxFunction = void(const ITensor &in, ITensor &out, const Window &window); + +private: + Logits1DMaxFunction *_func; + BorderSize _border_size; +}; + +/** Interface for softmax computation for QASYMM8 with pre-computed max. */ +template +class NELogits1DSoftmaxKernel : public INEKernel +{ +public: + const char *name() const override + { + if(IS_LOG) + { + return "NELogits1DSoftmaxKernel"; + } + else + { + return "NELogits1DLogSoftmaxKernel"; + } + } + /** Default constructor */ + NELogits1DSoftmaxKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DSoftmaxKernel(const NELogits1DSoftmaxKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DSoftmaxKernel &operator=(const NELogits1DSoftmaxKernel &) = delete; + /** Allow instances of this class to be moved */ + NELogits1DSoftmaxKernel(NELogits1DSoftmaxKernel &&) = default; + /** Allow instances of this class to be moved */ + NELogits1DSoftmaxKernel &operator=(NELogits1DSoftmaxKernel &&) = default; + /** Default destructor */ + ~NELogits1DSoftmaxKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] max Max values tensor. Same shape as input with dimension 0 set to 1. + * Data types supported: same as @p input. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] beta A scaling factor for the exponent. + * + * @param tmp Auxiliary tensor. Must be type F32 and same shape as the input. + */ + void configure(const ITensor *input, const ITensor *max, ITensor *output, const float beta, ITensor *tmp); + /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DSoftmaxKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] max Max values tensor info. Same shape as input with dimension 0 set to 1. + * Data types supported: same as @p input. + * @param[in] output Destination tensor info. Data types supported: same as @p input. + * @param[in] beta A scaling factor for the exponent. + * @param[in] tmp Tensor info of auxiliary. Must be type F32 and same shape as the input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *max, + const ITensorInfo *output, const float beta, const ITensorInfo *tmp); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + using LogitsSoftmaxFunction = void(const ITensor &in, const ITensor &max, void *const tmp, ITensor &out, const float beta, + const Window &window); + + LogitsSoftmaxFunction *_func; + const ITensor *_input; + const ITensor *_max; + ITensor *_output; + float _beta; + ITensor *_tmp; //Temporary. Used internally +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp index 3293466979..27b3154298 100644 --- a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp +++ b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h" +#include "src/core/NEON/kernels/NESpaceToBatchLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.h b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.h new file mode 100644 index 0000000000..627724580b --- /dev/null +++ b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H +#define ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declaration +class ITensor; + +/** Interface for the space to batch kernel */ +class NESpaceToBatchLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NESpaceToBatchLayerKernel"; + } + /** Default constructor */ + NESpaceToBatchLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESpaceToBatchLayerKernel(const NESpaceToBatchLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESpaceToBatchLayerKernel &operator=(const NESpaceToBatchLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NESpaceToBatchLayerKernel(NESpaceToBatchLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NESpaceToBatchLayerKernel &operator=(NESpaceToBatchLayerKernel &&) = default; + /** Default destructor */ + ~NESpaceToBatchLayerKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, ITensor *output); + /** Initialise the kernel's input and output. (Static block shape and paddings) + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[in] padding_left The left padding of the output tensor. + * @param[in] padding_right The right padding of the output tensor. + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 + * @param[in] output Tensor output. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel (Static block shape and paddings) + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[in] padding_left The left padding of the output tensor. + * @param[in] padding_right The right padding of the output tensor. + * @param[in] output Tensor output. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; /**< Source tensor */ + const ITensor *_block_shape; /**< Block shape tensor */ + const ITensor *_paddings; /**< Paddings tensor */ + ITensor *_output; /**< Destination tensor */ + DataLayout _data_layout; /**< Data layout to be used at run-time */ + + Size2D _padding_left; + int _block_shape_x; + int _block_shape_y; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp index 7c9cc4996b..7687c50c40 100644 --- a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp +++ b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h" +#include "src/core/NEON/kernels/NESpaceToDepthLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.h b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.h new file mode 100644 index 0000000000..953b68a401 --- /dev/null +++ b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H +#define ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the space to depth kernel */ +class NESpaceToDepthLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NESpaceToDepthLayerKernel"; + } + /** Default constructor */ + NESpaceToDepthLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESpaceToDepthLayerKernel(const NESpaceToDepthLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESpaceToDepthLayerKernel &operator=(const NESpaceToDepthLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NESpaceToDepthLayerKernel(NESpaceToDepthLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NESpaceToDepthLayerKernel &operator=(NESpaceToDepthLayerKernel &&) = default; + /** Default destructor */ + ~NESpaceToDepthLayerKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape value + */ + void configure(const ITensor *input, ITensor *output, int32_t block_shape); + /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToDepthLayerKernel + * + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. + * @param[in] output Tensor output info. Data types supported: same as @p input + * @param[in] block_shape Block shape value + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ + int32_t _block_shape; /**< Block shape */ + DataLayout _data_layout; /**< Data layout of the operation */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEStackLayerKernel.cpp b/src/core/NEON/kernels/NEStackLayerKernel.cpp index ad7f1b1300..55170a169a 100644 --- a/src/core/NEON/kernels/NEStackLayerKernel.cpp +++ b/src/core/NEON/kernels/NEStackLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEStackLayerKernel.h" +#include "src/core/NEON/kernels/NEStackLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEStackLayerKernel.h b/src/core/NEON/kernels/NEStackLayerKernel.h new file mode 100644 index 0000000000..9b0a039b88 --- /dev/null +++ b/src/core/NEON/kernels/NEStackLayerKernel.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_NESTACKLAYERKERNEL_H +#define ARM_COMPUTE_NESTACKLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/ +class NEStackLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEStackLayerKernel"; + } + /** Default constructor */ + NEStackLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEStackLayerKernel(const NEStackLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEStackLayerKernel &operator=(const NEStackLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEStackLayerKernel(NEStackLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEStackLayerKernel &operator=(NEStackLayerKernel &&) = default; + /** Default destructor */ + ~NEStackLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @note Supported input tensor rank: up to 4 + * + * @param[in] input Input tensor. Data types supported: All + * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. + * @param[in] idx_input Index of the input tensor in the list of tensors to stack. + * All tensors in the list must have the same shape + * @param[in] num_tensors Number of tensors to stack + * @param[out] output Output tensor. Data types supported: Same as @p input. + * + */ + void configure(const ITensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEStackLayerKernel + * + * @note Supported input tensor rank: up to 4 + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. + * @param[in] idx_input Index of the input tensor in the list of tensors to stack + * All tensors in the list must have the same shape + * @param[in] num_tensors Number of tensors to stack + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output); + + // Inherited methods overridden + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; + unsigned int _axis; + unsigned int _idx_input; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NESTACKLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEStridedSliceKernel.cpp b/src/core/NEON/kernels/NEStridedSliceKernel.cpp index 13b2cb5a10..ac04a1076d 100644 --- a/src/core/NEON/kernels/NEStridedSliceKernel.cpp +++ b/src/core/NEON/kernels/NEStridedSliceKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h" +#include "src/core/NEON/kernels/NEStridedSliceKernel.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEStridedSliceKernel.h b/src/core/NEON/kernels/NEStridedSliceKernel.h new file mode 100644 index 0000000000..9ce517417d --- /dev/null +++ b/src/core/NEON/kernels/NEStridedSliceKernel.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H +#define ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the kernel to perform tensor strided slicing */ +class NEStridedSliceKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEStridedSliceKernel"; + } + /** Default constructor */ + NEStridedSliceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEStridedSliceKernel(const NEStridedSliceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEStridedSliceKernel &operator=(const NEStridedSliceKernel &) = delete; + /** Allow instances of this class to be moved */ + NEStridedSliceKernel(NEStridedSliceKernel &&) = default; + /** Allow instances of this class to be moved */ + NEStridedSliceKernel &operator=(NEStridedSliceKernel &&) = default; + /** Default destructor */ + ~NEStridedSliceKernel() = default; + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] input Source tensor info. Data type supported: All + * @param[out] output Destination tensor info. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. + * A slice of size 1 starting from starts[i] in the dimension must be preserved. + */ + void configure(const ITensorInfo *input, ITensorInfo *output, + const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, + int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); + + /** Static function to check if given info will lead to a valid configuration of @ref NEStridedSliceKernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] input Source tensor info. Data type supported: All + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. + * A slice of size 1 starting from starts[i] in the dimension must be preserved. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, + int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + Coordinates _starts_abs; /**< Absolute start coordinates */ + Coordinates _final_strides; /**< Final strides */ + int32_t _shrink_mask; /**< Shrink axis mask */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H */ diff --git a/src/core/NEON/kernels/NETableLookupKernel.cpp b/src/core/NEON/kernels/NETableLookupKernel.cpp index d26a0eedb5..19ce7f0352 100644 --- a/src/core/NEON/kernels/NETableLookupKernel.cpp +++ b/src/core/NEON/kernels/NETableLookupKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h" +#include "src/core/NEON/kernels/NETableLookupKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NETableLookupKernel.h b/src/core/NEON/kernels/NETableLookupKernel.h new file mode 100644 index 0000000000..7937999b46 --- /dev/null +++ b/src/core/NEON/kernels/NETableLookupKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NETABLELOOKUPKERNEL_H +#define ARM_COMPUTE_NETABLELOOKUPKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; +class ILut; + +/** Interface for the kernel to perform table lookup calculations. */ +class NETableLookupKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NETableLookupKernel"; + } + /** Default constructor */ + NETableLookupKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETableLookupKernel(const NETableLookupKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETableLookupKernel &operator=(const NETableLookupKernel &) = delete; + /** Allow instances of this class to be moved */ + NETableLookupKernel(NETableLookupKernel &&) = default; + /** Allow instances of this class to be moved */ + NETableLookupKernel &operator=(NETableLookupKernel &&) = default; + /** Default destructor */ + ~NETableLookupKernel() = default; + /** Initialise the kernel's input, lut and output. + * + * @param[in] input An input tensor. Data types supported: U8/S16. + * @param[in] lut The input LUT. + * @param[out] output The output tensor. Data types supported: same as @p input + */ + void configure(const ITensor *input, const ILut *lut, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Perform table lookup on a given window. + * + * @param window window Region on which to execute the kernel. + */ + template + void tableLookup(const Window &window); + /** Common signature for all the specialised lut functions + * + * @param[in] window Region on which to execute the kernel. + */ + using TableLookupFunction = void (NETableLookupKernel::*)(const Window &window); + /** Sub function to use for the particular tensor types passed to configure() */ + TableLookupFunction _func; + const ILut *_lut; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NETABLELOOKUPKERNEL_H */ diff --git a/src/core/NEON/kernels/NEThresholdKernel.cpp b/src/core/NEON/kernels/NEThresholdKernel.cpp index aad440b120..183bb8db5c 100644 --- a/src/core/NEON/kernels/NEThresholdKernel.cpp +++ b/src/core/NEON/kernels/NEThresholdKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEThresholdKernel.h" +#include "src/core/NEON/kernels/NEThresholdKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEThresholdKernel.h b/src/core/NEON/kernels/NEThresholdKernel.h new file mode 100644 index 0000000000..6b3b3866b0 --- /dev/null +++ b/src/core/NEON/kernels/NEThresholdKernel.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NETHRESHOLDKERNEL_H +#define ARM_COMPUTE_NETHRESHOLDKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the thresholding kernel */ +class NEThresholdKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEThresholdKernel"; + } + /** Default constructor */ + NEThresholdKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEThresholdKernel(const NEThresholdKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEThresholdKernel &operator=(const NEThresholdKernel &) = delete; + /** Allow instances of this class to be moved */ + NEThresholdKernel(NEThresholdKernel &&) = default; + /** Allow instances of this class to be moved */ + NEThresholdKernel &operator=(NEThresholdKernel &&) = default; + /** Default destructor */ + ~NEThresholdKernel() = default; + /** Initialise the kernel's input, output and threshold parameters. + * + * @param[in] input An input tensor. Data type supported: U8 + * @param[out] output The output tensor. Data type supported: U8. + * @param[in] info Threshold kernel descriptor + */ + void configure(const ITensor *input, ITensor *output, const ThresholdKernelInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref NEThresholdKernel + * + * @param[in] input Input tensor info. Data type supported: U8 + * @param[in] output Output tensor info. Data type supported: U8 + * @param[in] info Threshold kernel descriptor + * + * @return A status containing an error code in case of failure + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** run binary thresholding on the given window */ + void run_binary(const Window &window); + /** run range thresholding on the given window */ + void run_range(const Window &window); + + void (NEThresholdKernel::*_func)(const Window &window); + + const ITensor *_input; /**< Input */ + ITensor *_output; /**< Output */ + ThresholdKernelInfo _info; /**< Threshold descriptor */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */ diff --git a/src/core/NEON/kernels/NETileKernel.cpp b/src/core/NEON/kernels/NETileKernel.cpp index 99651c8b8a..94256dc12d 100644 --- a/src/core/NEON/kernels/NETileKernel.cpp +++ b/src/core/NEON/kernels/NETileKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NETileKernel.h" +#include "src/core/NEON/kernels/NETileKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NETileKernel.h b/src/core/NEON/kernels/NETileKernel.h new file mode 100644 index 0000000000..8dfea8bc2f --- /dev/null +++ b/src/core/NEON/kernels/NETileKernel.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NETILEKERNEL_H +#define ARM_COMPUTE_NETILEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a tile operation */ +class NETileKernel : public INEKernel +{ +public: + /** Default constructor */ + NETileKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NETileKernel(const NETileKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NETileKernel &operator=(const NETileKernel &) = delete; + /** Allow instances of this class to be moved */ + NETileKernel(NETileKernel &&) = default; + /** Allow instances of this class to be moved */ + NETileKernel &operator=(NETileKernel &&) = default; + /** Default destructor */ + ~NETileKernel() = default; + const char *name() const override + { + return "NETileKernel"; + } + /** Set the source, destination of the kernel + * + * @param[in] input Source tensor. Data type supported: All. + * @param[out] output Destination tensor. Same as @p input + * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. + */ + void configure(const ITensor *input, ITensor *output, const Multiples &multiples); + /** Static function to check if given info will lead to a valid configuration of @ref NETileKernel + * + * @param[in] input Source tensor info. Data type supported: All. + * @param[in] output Destination tensor info. Same as @p input + * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NETILEKERNEL_H */ diff --git a/src/core/NEON/kernels/NETransposeKernel.cpp b/src/core/NEON/kernels/NETransposeKernel.cpp index 6037810a44..134831be4c 100644 --- a/src/core/NEON/kernels/NETransposeKernel.cpp +++ b/src/core/NEON/kernels/NETransposeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" +#include "src/core/NEON/kernels/NETransposeKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NETransposeKernel.h b/src/core/NEON/kernels/NETransposeKernel.h new file mode 100644 index 0000000000..73d2098fb3 --- /dev/null +++ b/src/core/NEON/kernels/NETransposeKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NETRANSPOSEKERNEL_H +#define ARM_COMPUTE_NETRANSPOSEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel which transposes the elements of a matrix. + * + * [width, height, batch] -> [height, width, batch] + * + */ +class NETransposeKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NETransposeKernel"; + } + /** Default constructor */ + NETransposeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETransposeKernel(const NETransposeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETransposeKernel &operator=(const NETransposeKernel &) = delete; + /** Allow instances of this class to be moved */ + NETransposeKernel(NETransposeKernel &&) = default; + /** Allow instances of this class to be moved */ + NETransposeKernel &operator=(NETransposeKernel &&) = default; + /** Default destructor */ + ~NETransposeKernel() = default; + + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NETransposeKernel + * + * @param[in] input Input tensor. Data types supported: All + * @param[in] output Output tensor. Data type supported: Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the transpose functions + * + * @param[in] input An input tensor. Data types supported: All + * @param[out] output The output tensor. Data type supported: same as @p input + * @param[in] window Region on which to execute the kernel. + */ + using TransposeFunction = void(const ITensor *input, ITensor *output, const Window &window); + /** Transpose function to use for the particular tensor types passed to configure() */ + TransposeFunction *_func; + const ITensor *_input; + ITensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NETRANSPOSEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp b/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp index 129c83c695..cbdec50a42 100644 --- a/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp +++ b/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h" +#include "src/core/NEON/kernels/NEUpsampleLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEUpsampleLayerKernel.h b/src/core/NEON/kernels/NEUpsampleLayerKernel.h new file mode 100644 index 0000000000..7ff797a9f8 --- /dev/null +++ b/src/core/NEON/kernels/NEUpsampleLayerKernel.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H +#define ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the Upsample layer kernel.*/ +class NEUpsampleLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEUpsampleLayerKernel"; + } + /** Default constructor */ + NEUpsampleLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEUpsampleLayerKernel(const NEUpsampleLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEUpsampleLayerKernel &operator=(const NEUpsampleLayerKernel &) = delete; + /** Default Move Constructor. */ + NEUpsampleLayerKernel(NEUpsampleLayerKernel &&) = default; + /** Default move assignment operator */ + NEUpsampleLayerKernel &operator=(NEUpsampleLayerKernel &&) = default; + /** Default destructor */ + ~NEUpsampleLayerKernel() = default; + /** Set the input output tensors. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] info Contains stride information described in @ref Size2D. + * @param[in] policy Defines the policy to fill the intermediate pixels. + * + */ + void configure(const ITensor *input, ITensor *output, const Size2D &info, const InterpolationPolicy policy); + /** Static function to check if given info will lead to a valid configuration of @ref NEUpsampleLayerKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Destination tensor info. Data types supported: same as @p input. + * @param[in] info Contains stride information described in @ref Size2D. + * @param[in] policy Defines the policy to fill the intermediate pixels. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy policy); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Function to run upsample layer (NCHW) + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void upsample_nchw(const Window &window); + /** Function to run upsample layer (NHWC) + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void upsample_nhwc(const Window &window); + + using UpsampleFunctionPtr = void (NEUpsampleLayerKernel::*)(const Window &window); + +private: + UpsampleFunctionPtr _func; + const ITensor *_input; + ITensor *_output; + Size2D _info; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEWarpKernel.cpp b/src/core/NEON/kernels/NEWarpKernel.cpp index 891304f02c..1ae076153b 100644 --- a/src/core/NEON/kernels/NEWarpKernel.cpp +++ b/src/core/NEON/kernels/NEWarpKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEWarpKernel.h" +#include "src/core/NEON/kernels/NEWarpKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEWarpKernel.h b/src/core/NEON/kernels/NEWarpKernel.h new file mode 100644 index 0000000000..2c4cb55e3c --- /dev/null +++ b/src/core/NEON/kernels/NEWarpKernel.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEWARPKERNEL_H +#define ARM_COMPUTE_NEWARPKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +#include +#include +namespace arm_compute +{ +class ITensor; + +/** Common interface for warp affine and warp perspective */ +class INEWarpKernel : public INEKernel +{ +public: + /** Default constructor */ + INEWarpKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEWarpKernel(const INEWarpKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEWarpKernel &operator=(const INEWarpKernel &) = delete; + /** Allow instances of this class to be moved */ + INEWarpKernel(INEWarpKernel &&) = default; + /** Allow instances of this class to be moved */ + INEWarpKernel &operator=(INEWarpKernel &&) = default; + /** Default destructor */ + ~INEWarpKernel() = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: U8. + * @param[in] matrix The perspective or affine matrix to use. Must be 2x3 for affine and 3x3 for perspective of type float. + * The matrix argument requires 9 values, for the affine case the last 3 values are ignored. + * @param[in] border_mode Strategy to use for borders + * @param[in] constant_border_value Constant value used for filling the border. + */ + virtual void configure(const ITensor *input, ITensor *output, const std::array &matrix, BorderMode border_mode, uint8_t constant_border_value); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + + // Inherited methods overridden: + BorderSize border_size() const override; + +protected: + /** function to perform warp affine or warp perspective on the given window when border mode == UNDEFINED + * + * @param[in] window Region on which to execute the kernel + */ + virtual void warp_undefined(const Window &window) = 0; + /** function to perform warp affine or warp perspective on the given window when border mode == CONSTANT + * + * @param[in] window Region on which to execute the kernel + */ + virtual void warp_constant(const Window &window) = 0; + /** function to perform warp affine or warp perspective on the given window when border mode == REPLICATE + * + * @param[in] window Region on which to execute the kernel + */ + virtual void warp_replicate(const Window &window) = 0; + /** Common signature for all the specialised warp functions + * + * @param[in] window Region on which to execute the kernel. + */ + void (INEWarpKernel::*_func)(const Window &window); + + const ITensor *_input; /**< Input Tensor */ + ITensor *_output; /**< Output Tensor */ + uint8_t _constant_border_value; /**< Constant value used for filling the border. This value is used for those pixels out of the ROI when the border mode is CONSTANT */ + std::array _matrix; /**< The affine or perspective matrix. Must be 2x3 for warp affine or 3x3 for warp perspective of type float. */ +}; + +/** Template interface for the kernel to compute warp affine + * + */ +template +class NEWarpAffineKernel : public INEWarpKernel +{ +private: + const char *name() const override + { + return "NEWarpAffineKernel"; + } + // Inherited methods overridden: + void warp_undefined(const Window &window) override; + void warp_constant(const Window &window) override; + void warp_replicate(const Window &window) override; +}; + +/** Template interface for the kernel to compute warp perspective + * + */ +template +class NEWarpPerspectiveKernel : public INEWarpKernel +{ +private: + const char *name() const override + { + return "NEWarpPerspectiveKernel"; + } + // Inherited methods overridden: + void warp_undefined(const Window &window) override; + void warp_constant(const Window &window) override; + void warp_replicate(const Window &window) override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEWARPKERNEL_H */ diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp index c7fa2d2365..118655b755 100644 --- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp +++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" +#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.h b/src/core/NEON/kernels/NEWeightsReshapeKernel.h new file mode 100644 index 0000000000..9678b79fda --- /dev/null +++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H +#define ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform reshaping on the weights used by convolution and locally connected layer + * + * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. + * In combination with the @ref NEIm2ColKernel can transform a convolution to a matrix multiplication. + * + * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have: + * @f[ + * \left( \begin{array}{ccc} + * a000 & a001 & a002 \\ + * a010 & a011 & a012 \\ + * a020 & a021 & a022 \\ + * \end{array} \right) + * \left( \begin{array}{ccc} + * a100 & a101 & a102 \\ + * a110 & a111 & a112 \\ + * a120 & a121 & a122 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccc} + * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\ + * \end{array} \right) + * @f] + */ +class NEWeightsReshapeKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEWeightsReshapeKernel"; + } + /** Constructor.*/ + NEWeightsReshapeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWeightsReshapeKernel(const NEWeightsReshapeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWeightsReshapeKernel &operator=(const NEWeightsReshapeKernel &) = delete; + /** Allow instances of this class to be moved */ + NEWeightsReshapeKernel(NEWeightsReshapeKernel &&) = default; + /** Allow instances of this class to be moved */ + NEWeightsReshapeKernel &operator=(NEWeightsReshapeKernel &&) = default; + /** Default destructor */ + ~NEWeightsReshapeKernel() = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. + * Data types supported: All + * @param[in] bias The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input + * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. + * @param[out] output The output tensor. Data types supported: Same as @p input + */ + void configure(const ITensor *input, const ITensor *bias, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEWeightsReshapeKernel + * + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. + * Data types supported: All + * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input + * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. + * @param[in] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + const ITensor *_bias; + ITensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp index 90afbd6a19..b5afeed1f6 100644 --- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h new file mode 100644 index 0000000000..81b4cbed9e --- /dev/null +++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H +#define ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the width concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class NEWidthConcatenateLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEWidthConcatenateLayerKernel"; + } + /** Default constructor */ + NEWidthConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWidthConcatenateLayerKernel(const NEWidthConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWidthConcatenateLayerKernel &operator=(const NEWidthConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEWidthConcatenateLayerKernel(NEWidthConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEWidthConcatenateLayerKernel &operator=(NEWidthConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~NEWidthConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] width_offset The offset on the X axis. + * @param[in,out] output Output tensor info. Data types supported: Same as @p input. + */ + void configure(const ITensorInfo *input, unsigned int width_offset, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEWidthConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] width_offset The offset on the X axis. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + unsigned int _width_offset; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h index bf5d77fc43..2b87e512dc 100644 --- a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h +++ b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h @@ -24,7 +24,7 @@ #ifndef ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H #define ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H -#include "arm_compute/core/NEON/INEKernel.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/NEON/kernels/convolution/common/convolution.hpp" #include "src/core/NEON/kernels/convolution/common/tensor.hpp" diff --git a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp index 48c0616b35..33bcc20d39 100644 --- a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp +++ b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h" +#include "src/core/NEON/kernels/NEYOLOLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEYOLOLayerKernel.h b/src/core/NEON/kernels/NEYOLOLayerKernel.h new file mode 100644 index 0000000000..806cf9cc09 --- /dev/null +++ b/src/core/NEON/kernels/NEYOLOLayerKernel.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEYOLOLAYERKERNEL_H +#define ARM_COMPUTE_NEYOLOLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the YOLO layer kernel. */ +class NEYOLOLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEYOLOLayerKernel"; + } + /** Constructor */ + NEYOLOLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEYOLOLayerKernel(const NEYOLOLayerKernel &) = delete; + /** Default move constructor */ + NEYOLOLayerKernel(NEYOLOLayerKernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEYOLOLayerKernel &operator=(const NEYOLOLayerKernel &) = delete; + /** Default move assignment operator */ + NEYOLOLayerKernel &operator=(NEYOLOLayerKernel &&) = default; + /** Default destructor */ + ~NEYOLOLayerKernel() = default; + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place + * + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] act_info Activation layer parameters. + * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) + */ + void configure(ITensor *input, ITensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); + /** Static function to check if given info will lead to a valid configuration of @ref NEYOLOLayerKernel + * + * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result + * of the activation function. Data types supported: F16/F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Function to run YOLO layer + * + * @param[in] window Region on which to execute the kernel. + */ + template + void yolo_layer_nchw(const Window &window); + /** Function to run YOLO layer on tensors with NHWC format + * + * @param[in] window Region on which to execute the kernel. + */ + template + void yolo_layer_nhwc(const Window &window); + /** Common signature for all the yolo layer functions + * + * @param[in] window Region on which to execute the kernel. + */ + using YOLOFunctionPtr = void (NEYOLOLayerKernel::*)(const Window &window); + +private: + YOLOFunctionPtr _func; + ITensor *_input; + ITensor *_output; + ActivationLayerInfo _act_info; + int32_t _num_classes; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEYOLOLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h index 030f1aad12..92c013260b 100644 --- a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h +++ b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h @@ -24,7 +24,7 @@ #ifndef SRC_INEGEMMWRAPPERKERNEL_H #define SRC_INEGEMMWRAPPERKERNEL_H -#include "arm_compute/core/NEON/INEKernel.h" +#include "src/core/NEON/INEKernel.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h b/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h index a2f7e3bd59..a956898403 100644 --- a/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h +++ b/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h @@ -24,9 +24,9 @@ #ifndef SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H #define SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/NEON/kernels/convolution/depthwise/depthwise.hpp" diff --git a/src/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h b/src/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h index 4af82f89a8..7fcf2b1e4d 100644 --- a/src/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h +++ b/src/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h @@ -24,10 +24,10 @@ #ifndef ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H #define ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_gemm_compute_iface.hpp" +#include "src/core/NEON/INEKernel.h" #include "gemm_common.hpp" diff --git a/src/core/TracePoint.cpp b/src/core/TracePoint.cpp index 06d9527486..d67faad868 100644 --- a/src/core/TracePoint.cpp +++ b/src/core/TracePoint.cpp @@ -33,12 +33,12 @@ #include "arm_compute/core/IPyramid.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Window.h" #include "arm_compute/runtime/FunctionDescriptors.h" #include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "src/core/NEON/kernels/assembly/arm_gemm.hpp" #include "utils/TypePrinter.h" #include diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp index 95c6631830..ec06f3fa30 100644 --- a/src/graph/backends/NEON/NEFunctionFactory.cpp +++ b/src/graph/backends/NEON/NEFunctionFactory.cpp @@ -32,6 +32,7 @@ #include "arm_compute/graph/nodes/Nodes.h" #include "arm_compute/runtime/CPP/CPPFunctions.h" #include "arm_compute/runtime/NEON/NEFunctions.h" +#include "src/core/NEON/NEKernels.h" #include "support/Cast.h" #include "support/ToolchainSupport.h" diff --git a/src/graph/backends/NEON/NENodeValidator.cpp b/src/graph/backends/NEON/NENodeValidator.cpp index 63e8ff910f..a9e5a86249 100644 --- a/src/graph/backends/NEON/NENodeValidator.cpp +++ b/src/graph/backends/NEON/NENodeValidator.cpp @@ -28,6 +28,19 @@ #include "arm_compute/runtime/CPP/CPPFunctions.h" #include "arm_compute/runtime/NEON/NEFunctions.h" +#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" +#include "src/core/NEON/kernels/NEReshapeLayerKernel.h" +#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h" #include "support/Cast.h" using namespace arm_compute::utils::cast; diff --git a/src/runtime/NEON/INEOperator.cpp b/src/runtime/NEON/INEOperator.cpp index 75068b15c9..a13b29b572 100644 --- a/src/runtime/NEON/INEOperator.cpp +++ b/src/runtime/NEON/INEOperator.cpp @@ -22,12 +22,16 @@ * SOFTWARE. */ #include "arm_compute/runtime/NEON/INEOperator.h" +#include "arm_compute/core/Window.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/INEKernel.h" namespace arm_compute { namespace experimental { +INEOperator::~INEOperator() = default; + INEOperator::INEOperator(IRuntimeContext *ctx) : _kernel(), _ctx(ctx), _workspace() { diff --git a/src/runtime/NEON/INESimpleFunction.cpp b/src/runtime/NEON/INESimpleFunction.cpp index cef2762e37..5438bce62a 100644 --- a/src/runtime/NEON/INESimpleFunction.cpp +++ b/src/runtime/NEON/INESimpleFunction.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,9 +23,14 @@ */ #include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/core/CPP/ICPPKernel.h" +#include "arm_compute/core/Window.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" -using namespace arm_compute; +namespace arm_compute +{ +INESimpleFunction::~INESimpleFunction() = default; INESimpleFunction::INESimpleFunction() // NOLINT : _kernel(), @@ -35,6 +40,7 @@ INESimpleFunction::INESimpleFunction() // NOLINT void INESimpleFunction::run() { - NEScheduler::get().schedule(&_border_handler, Window::DimZ); + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); NEScheduler::get().schedule(_kernel.get(), Window::DimY); } +} //namespace arm_compute diff --git a/src/runtime/NEON/INESimpleFunctionNoBorder.cpp b/src/runtime/NEON/INESimpleFunctionNoBorder.cpp index f2181e0a74..21dd58e378 100644 --- a/src/runtime/NEON/INESimpleFunctionNoBorder.cpp +++ b/src/runtime/NEON/INESimpleFunctionNoBorder.cpp @@ -23,11 +23,15 @@ */ #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/core/Window.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/INEKernel.h" #include "src/runtime/Utils.h" namespace arm_compute { +INESimpleFunctionNoBorder::~INESimpleFunctionNoBorder() = default; + INESimpleFunctionNoBorder::INESimpleFunctionNoBorder(IRuntimeContext *ctx) : _kernel(), _ctx(ctx) diff --git a/src/runtime/NEON/functions/NEAbsoluteDifference.cpp b/src/runtime/NEON/functions/NEAbsoluteDifference.cpp index ec27820126..df2bc7d72e 100644 --- a/src/runtime/NEON/functions/NEAbsoluteDifference.cpp +++ b/src/runtime/NEON/functions/NEAbsoluteDifference.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h" -#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" +#include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; +namespace arm_compute +{ +NEAbsoluteDifference::~NEAbsoluteDifference() = default; void NEAbsoluteDifference::configure(const ITensor *input1, const ITensor *input2, ITensor *output) { @@ -36,3 +38,4 @@ void NEAbsoluteDifference::configure(const ITensor *input1, const ITensor *input k->configure(input1, input2, output); _kernel = std::move(k); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEAccumulate.cpp b/src/runtime/NEON/functions/NEAccumulate.cpp index 662f8ccb5b..20eefd9d2d 100644 --- a/src/runtime/NEON/functions/NEAccumulate.cpp +++ b/src/runtime/NEON/functions/NEAccumulate.cpp @@ -23,12 +23,14 @@ */ #include "arm_compute/runtime/NEON/functions/NEAccumulate.h" -#include "arm_compute/core/NEON/kernels/NEAccumulateKernel.h" +#include "src/core/NEON/kernels/NEAccumulateKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; +namespace arm_compute +{ +NEAccumulate::~NEAccumulate() = default; void NEAccumulate::configure(const ITensor *input, ITensor *output) { @@ -37,6 +39,8 @@ void NEAccumulate::configure(const ITensor *input, ITensor *output) _kernel = std::move(k); } +NEAccumulateWeighted::~NEAccumulateWeighted() = default; + void NEAccumulateWeighted::configure(const ITensor *input, float alpha, ITensor *output, bool use_fp16) { if(use_fp16) @@ -53,9 +57,12 @@ void NEAccumulateWeighted::configure(const ITensor *input, float alpha, ITensor } } +NEAccumulateSquared::~NEAccumulateSquared() = default; + void NEAccumulateSquared::configure(const ITensor *input, uint32_t shift, ITensor *output) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, shift, output); _kernel = std::move(k); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEActivationLayer.cpp b/src/runtime/NEON/functions/NEActivationLayer.cpp index 7f55edbf70..f9ad298e4d 100644 --- a/src/runtime/NEON/functions/NEActivationLayer.cpp +++ b/src/runtime/NEON/functions/NEActivationLayer.cpp @@ -24,16 +24,18 @@ #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/core/Error.h" -#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" #include "arm_compute/core/experimental/Types.h" #include "arm_compute/runtime/IRuntimeContext.h" #include "arm_compute/runtime/Tensor.h" +#include "src/core/NEON/kernels/NEActivationLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute { namespace experimental { +NEActivationLayer::~NEActivationLayer() = default; + void NEActivationLayer::configure(const ITensorInfo *input, ITensorInfo *output, const ActivationLayerInfo &activation_info) { auto k = arm_compute::support::cpp14::make_unique(); diff --git a/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp b/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp index 70bbba62ad..2a9bb76c7f 100644 --- a/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp +++ b/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp @@ -29,11 +29,14 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/kernels/NEReductionOperationKernel.h" #include "support/MemorySupport.h" namespace arm_compute { +NEArgMinMaxLayer::~NEArgMinMaxLayer() = default; + NEArgMinMaxLayer::NEArgMinMaxLayer(std::shared_ptr memory_manager) : _reduction_function(support::cpp14::make_unique()) { diff --git a/src/runtime/NEON/functions/NEArithmeticAddition.cpp b/src/runtime/NEON/functions/NEArithmeticAddition.cpp index 4453a015e8..0bf9a09333 100644 --- a/src/runtime/NEON/functions/NEArithmeticAddition.cpp +++ b/src/runtime/NEON/functions/NEArithmeticAddition.cpp @@ -24,7 +24,7 @@ #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" +#include "src/core/NEON/kernels/NEArithmeticAdditionKernel.h" #include "support/MemorySupport.h" #include @@ -33,6 +33,8 @@ namespace arm_compute { namespace experimental { +NEArithmeticAddition::~NEArithmeticAddition() = default; + void NEArithmeticAddition::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info) { ARM_COMPUTE_UNUSED(act_info); diff --git a/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp index 1c95bbfae8..ba3f426269 100644 --- a/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp +++ b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp @@ -24,7 +24,7 @@ #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" +#include "src/core/NEON/kernels/NEArithmeticSubtractionKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp index eab40ac5be..d0fdfcf101 100644 --- a/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp +++ b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp @@ -29,10 +29,13 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" #include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEBatchNormalizationLayer::~NEBatchNormalizationLayer() = default; NEBatchNormalizationLayer::NEBatchNormalizationLayer() : _norm_kernel() @@ -43,7 +46,8 @@ void NEBatchNormalizationLayer::configure(ITensor *input, ITensor *output, const ActivationLayerInfo act_info) { // Configure kernel - _norm_kernel.configure(input, output, mean, var, beta, gamma, epsilon, act_info); + _norm_kernel = arm_compute::support::cpp14::make_unique(); + _norm_kernel->configure(input, output, mean, var, beta, gamma, epsilon, act_info); } Status NEBatchNormalizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *var, const ITensorInfo *beta, const ITensorInfo *gamma, @@ -55,5 +59,6 @@ Status NEBatchNormalizationLayer::validate(const ITensorInfo *input, const ITens void NEBatchNormalizationLayer::run() { - NEScheduler::get().schedule(&_norm_kernel, Window::DimY); + NEScheduler::get().schedule(_norm_kernel.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp b/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp index 2705cffe68..77a63c0f63 100644 --- a/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp +++ b/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h" #include "support/MemorySupport.h" diff --git a/src/runtime/NEON/functions/NEBitwiseAnd.cpp b/src/runtime/NEON/functions/NEBitwiseAnd.cpp index 1d89308565..f3b5220ccf 100644 --- a/src/runtime/NEON/functions/NEBitwiseAnd.cpp +++ b/src/runtime/NEON/functions/NEBitwiseAnd.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEBitwiseAnd.h" -#include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h" +#include "src/core/NEON/kernels/NEBitwiseAndKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEBitwiseNot.cpp b/src/runtime/NEON/functions/NEBitwiseNot.cpp index 585b059005..036584ea1a 100644 --- a/src/runtime/NEON/functions/NEBitwiseNot.cpp +++ b/src/runtime/NEON/functions/NEBitwiseNot.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEBitwiseNot.h" -#include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h" +#include "src/core/NEON/kernels/NEBitwiseNotKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEBitwiseOr.cpp b/src/runtime/NEON/functions/NEBitwiseOr.cpp index bba866d97a..fc905a0919 100644 --- a/src/runtime/NEON/functions/NEBitwiseOr.cpp +++ b/src/runtime/NEON/functions/NEBitwiseOr.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEBitwiseOr.h" -#include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h" +#include "src/core/NEON/kernels/NEBitwiseOrKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEBitwiseXor.cpp b/src/runtime/NEON/functions/NEBitwiseXor.cpp index 188fe3d9ef..301a0c4659 100644 --- a/src/runtime/NEON/functions/NEBitwiseXor.cpp +++ b/src/runtime/NEON/functions/NEBitwiseXor.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEBitwiseXor.h" -#include "arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h" +#include "src/core/NEON/kernels/NEBitwiseXorKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEBoundingBoxTransform.cpp b/src/runtime/NEON/functions/NEBoundingBoxTransform.cpp index b1ecfaf314..0b639430b1 100644 --- a/src/runtime/NEON/functions/NEBoundingBoxTransform.cpp +++ b/src/runtime/NEON/functions/NEBoundingBoxTransform.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h" +#include "src/core/NEON/kernels/NEBoundingBoxTransformKernel.h" #include "support/MemorySupport.h" diff --git a/src/runtime/NEON/functions/NEBox3x3.cpp b/src/runtime/NEON/functions/NEBox3x3.cpp index a380377daa..01d2356a4c 100644 --- a/src/runtime/NEON/functions/NEBox3x3.cpp +++ b/src/runtime/NEON/functions/NEBox3x3.cpp @@ -23,14 +23,15 @@ */ #include "arm_compute/runtime/NEON/functions/NEBox3x3.h" -#include "arm_compute/core/NEON/kernels/NEBox3x3Kernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/NEON/kernels/NEBox3x3Kernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NEBox3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value, bool use_fp16) { if(use_fp16) @@ -45,5 +46,8 @@ void NEBox3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode k->configure(input, output, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); } - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NECannyEdge.cpp b/src/runtime/NEON/functions/NECannyEdge.cpp index d7ec52c5ac..bf4f7d7933 100644 --- a/src/runtime/NEON/functions/NECannyEdge.cpp +++ b/src/runtime/NEON/functions/NECannyEdge.cpp @@ -25,8 +25,6 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" @@ -34,13 +32,19 @@ #include "arm_compute/runtime/NEON/functions/NESobel5x5.h" #include "arm_compute/runtime/NEON/functions/NESobel7x7.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NECannyEdgeKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NESobel5x5Kernel.h" +#include "src/core/NEON/kernels/NESobel7x7Kernel.h" #include "support/MemorySupport.h" #include #include #include -using namespace arm_compute; +namespace arm_compute +{ +NECannyEdge::~NECannyEdge() = default; NECannyEdge::NECannyEdge(std::shared_ptr memory_manager) // NOLINT : _memory_group(std::move(memory_manager)), @@ -139,21 +143,25 @@ void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr, _memory_group.manage(&_nonmax); // Configure non-maxima suppression - _non_max_suppr.configure(&_magnitude, &_phase, &_nonmax, upper_thr, lower_thr, border_mode == BorderMode::UNDEFINED); + _non_max_suppr = arm_compute::support::cpp14::make_unique(); + _non_max_suppr->configure(&_magnitude, &_phase, &_nonmax, upper_thr, lower_thr, border_mode == BorderMode::UNDEFINED); // Fill border around magnitude image as non-maxima suppression will access // it. If border mode is undefined filling the border is a nop. - _border_mag_gradient.configure(&_magnitude, _non_max_suppr.border_size(), border_mode, constant_border_value); + _border_mag_gradient = arm_compute::support::cpp14::make_unique(); + _border_mag_gradient->configure(&_magnitude, _non_max_suppr->border_size(), border_mode, constant_border_value); // Allocate intermediate tensors _phase.allocator()->allocate(); _magnitude.allocator()->allocate(); // Configure edge tracing - _edge_trace.configure(&_nonmax, output); + _edge_trace = arm_compute::support::cpp14::make_unique(); + _edge_trace->configure(&_nonmax, output); // Fill border with "No edge" to stop recursion in edge trace - _border_edge_trace.configure(&_nonmax, _edge_trace.border_size(), BorderMode::CONSTANT, static_cast(0.f)); + _border_edge_trace = arm_compute::support::cpp14::make_unique(); + _border_edge_trace->configure(&_nonmax, _edge_trace->border_size(), BorderMode::CONSTANT, static_cast(0.f)); // Allocate intermediate tensors _nonmax.allocator()->allocate(); @@ -172,17 +180,18 @@ void NECannyEdge::run() NEScheduler::get().schedule(_gradient.get(), Window::DimY); // Fill border before non-maxima suppression. Nop for border mode undefined. - NEScheduler::get().schedule(&_border_mag_gradient, Window::DimZ); + NEScheduler::get().schedule(_border_mag_gradient.get(), Window::DimZ); // Run non-maxima suppression - NEScheduler::get().schedule(&_non_max_suppr, Window::DimY); + NEScheduler::get().schedule(_non_max_suppr.get(), Window::DimY); ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr); std::fill_n(_output->buffer(), _output->info()->total_size(), 0); // Fill border before edge trace - NEScheduler::get().schedule(&_border_edge_trace, Window::DimZ); + NEScheduler::get().schedule(_border_edge_trace.get(), Window::DimZ); // Run edge tracing - NEScheduler::get().schedule(&_edge_trace, Window::DimY); + NEScheduler::get().schedule(_edge_trace.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NECast.cpp b/src/runtime/NEON/functions/NECast.cpp index 4b35110417..7fd2605fd2 100644 --- a/src/runtime/NEON/functions/NECast.cpp +++ b/src/runtime/NEON/functions/NECast.cpp @@ -24,8 +24,8 @@ #include "arm_compute/runtime/NEON/functions/NECast.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h" #include "arm_compute/core/TensorInfo.h" +#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEChannelCombine.cpp b/src/runtime/NEON/functions/NEChannelCombine.cpp index e987951097..f8a9be0313 100644 --- a/src/runtime/NEON/functions/NEChannelCombine.cpp +++ b/src/runtime/NEON/functions/NEChannelCombine.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEChannelCombine.h" -#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h" +#include "src/core/NEON/kernels/NEChannelCombineKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEChannelExtract.cpp b/src/runtime/NEON/functions/NEChannelExtract.cpp index d78a8f8301..8f5e4d47d9 100644 --- a/src/runtime/NEON/functions/NEChannelExtract.cpp +++ b/src/runtime/NEON/functions/NEChannelExtract.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEChannelExtract.h" -#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h" +#include "src/core/NEON/kernels/NEChannelExtractKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEChannelShuffleLayer.cpp b/src/runtime/NEON/functions/NEChannelShuffleLayer.cpp index 0392a92663..c72dec67ee 100644 --- a/src/runtime/NEON/functions/NEChannelShuffleLayer.cpp +++ b/src/runtime/NEON/functions/NEChannelShuffleLayer.cpp @@ -23,8 +23,8 @@ */ #include "arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h" -#include "arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/NEON/kernels/NEChannelShuffleLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NECol2Im.cpp b/src/runtime/NEON/functions/NECol2Im.cpp index e4fe36fd25..0706125157 100644 --- a/src/runtime/NEON/functions/NECol2Im.cpp +++ b/src/runtime/NEON/functions/NECol2Im.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NECol2Im.h" -#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" +#include "src/core/NEON/kernels/NECol2ImKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEColorConvert.cpp b/src/runtime/NEON/functions/NEColorConvert.cpp index 7befac7aa3..ebdd1046ce 100644 --- a/src/runtime/NEON/functions/NEColorConvert.cpp +++ b/src/runtime/NEON/functions/NEColorConvert.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEColorConvert.h" -#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h" +#include "src/core/NEON/kernels/NEColorConvertKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEComputeAllAnchors.cpp b/src/runtime/NEON/functions/NEComputeAllAnchors.cpp index cb89117ff9..3f5712dd3a 100644 --- a/src/runtime/NEON/functions/NEComputeAllAnchors.cpp +++ b/src/runtime/NEON/functions/NEComputeAllAnchors.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h" +#include "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEConcatenateLayer.cpp b/src/runtime/NEON/functions/NEConcatenateLayer.cpp index 72bd9e6b19..03a01aec6b 100644 --- a/src/runtime/NEON/functions/NEConcatenateLayer.cpp +++ b/src/runtime/NEON/functions/NEConcatenateLayer.cpp @@ -23,10 +23,10 @@ */ #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" -#include "arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" diff --git a/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp b/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp index f697efb367..291afe0273 100644 --- a/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp +++ b/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,9 +22,13 @@ * SOFTWARE. */ #include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h" +#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEConvertFullyConnectedWeights::~NEConvertFullyConnectedWeights() = default; + NEConvertFullyConnectedWeights::NEConvertFullyConnectedWeights() : _kernel() { @@ -33,7 +37,8 @@ NEConvertFullyConnectedWeights::NEConvertFullyConnectedWeights() void NEConvertFullyConnectedWeights::configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout) { - _kernel.configure(input, output, original_input_shape, data_layout); + _kernel = arm_compute::support::cpp14::make_unique(); + _kernel->configure(input, output, original_input_shape, data_layout); } Status NEConvertFullyConnectedWeights::validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, @@ -44,6 +49,6 @@ Status NEConvertFullyConnectedWeights::validate(const ITensorInfo *input, const void NEConvertFullyConnectedWeights::run() { - NEScheduler::get().schedule(&_kernel, Window::DimZ); + NEScheduler::get().schedule(_kernel.get(), Window::DimZ); } } // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEConvolution.cpp b/src/runtime/NEON/functions/NEConvolution.cpp index 8200a08ca8..07ac8bd42b 100644 --- a/src/runtime/NEON/functions/NEConvolution.cpp +++ b/src/runtime/NEON/functions/NEConvolution.cpp @@ -25,28 +25,38 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEConvolutionKernel.h" +#include "src/core/NEON/kernels/NEConvolutionKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "support/MemorySupport.h" #include #include -using namespace arm_compute; +namespace arm_compute +{ +NEConvolution3x3::~NEConvolution3x3() = default; void NEConvolution3x3::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +template +NEConvolutionSquare::~NEConvolutionSquare() = default; + template NEConvolutionSquare::NEConvolutionSquare(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler() @@ -66,6 +76,7 @@ void NEConvolutionSquare::configure(ITensor *input, ITensor *output _is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), matrix_size); + auto b = arm_compute::support::cpp14::make_unique(); if(_is_separable) { DataType intermediate_type = DataType::UNKNOWN; @@ -82,35 +93,40 @@ void NEConvolutionSquare::configure(ITensor *input, ITensor *output scale = calculate_matrix_scale(conv, matrix_size); } - _kernel_hor.configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED); - _kernel_vert.configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED); + _kernel_hor = arm_compute::support::cpp14::make_unique>(); + _kernel_vert = arm_compute::support::cpp14::make_unique>(); + + _kernel_hor->configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED); + _kernel_vert->configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED); _tmp.allocator()->allocate(); - _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value)); + b->configure(input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value)); } else { - _kernel.configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED); - _border_handler.configure(input, _kernel.border_size(), border_mode, PixelValue(constant_border_value)); + _kernel = arm_compute::support::cpp14::make_unique>(); + _kernel->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); } + _border_handler = std::move(b); } template void NEConvolutionSquare::run() { - NEScheduler::get().schedule(&_border_handler, Window::DimZ); + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); if(_is_separable) { MemoryGroupResourceScope scope_mg(_memory_group); - NEScheduler::get().schedule(&_kernel_hor, Window::DimY); - NEScheduler::get().schedule(&_kernel_vert, Window::DimY); + NEScheduler::get().schedule(_kernel_hor.get(), Window::DimY); + NEScheduler::get().schedule(_kernel_vert.get(), Window::DimY); } else { - NEScheduler::get().schedule(&_kernel, Window::DimY); + NEScheduler::get().schedule(_kernel.get(), Window::DimY); } } @@ -118,10 +134,16 @@ template class arm_compute::NEConvolutionSquare<5>; template class arm_compute::NEConvolutionSquare<7>; template class arm_compute::NEConvolutionSquare<9>; +NEConvolutionRectangle::~NEConvolutionRectangle() = default; + void NEConvolutionRectangle::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, conv, rows, cols, scale, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index 491425c487..901b1e880e 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -27,6 +27,27 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NECol2ImKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NECopyKernel.h" +#include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" +#include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" +#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h" +#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h" +#include "src/core/NEON/kernels/NEFFTScaleKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/core/NEON/kernels/NEIm2ColKernel.h" +#include "src/core/NEON/kernels/NEPadLayerKernel.h" +#include "src/core/NEON/kernels/NEReductionOperationKernel.h" +#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NECopy.cpp b/src/runtime/NEON/functions/NECopy.cpp index a461c18894..9e7bf40559 100644 --- a/src/runtime/NEON/functions/NECopy.cpp +++ b/src/runtime/NEON/functions/NECopy.cpp @@ -23,13 +23,15 @@ */ #include "arm_compute/runtime/NEON/functions/NECopy.h" -#include "arm_compute/core/NEON/kernels/NECopyKernel.h" +#include "src/core/NEON/kernels/NECopyKernel.h" #include "support/MemorySupport.h" #include namespace arm_compute { +NECopy::~NECopy() = default; + void NECopy::configure(ITensor *input, ITensor *output) { auto k = arm_compute::support::cpp14::make_unique(); diff --git a/src/runtime/NEON/functions/NECropResize.cpp b/src/runtime/NEON/functions/NECropResize.cpp index f8f99169aa..2e2d2251b6 100644 --- a/src/runtime/NEON/functions/NECropResize.cpp +++ b/src/runtime/NEON/functions/NECropResize.cpp @@ -24,6 +24,7 @@ #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/NEON/functions/NECropResize.h" +#include "src/core/NEON/kernels/NECropKernel.h" #include "support/MemorySupport.h" @@ -31,6 +32,8 @@ namespace arm_compute { +NECropResize::~NECropResize() = default; + NECropResize::NECropResize() : _output(nullptr), _num_boxes(0), _method(), _extrapolation_value(0), _crop(), _scale(), _crop_results(), _scaled_results() { diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp index cb9ab168a7..2b5b0082c4 100644 --- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h" #include "src/core/helpers/AutoConfiguration.h" using namespace arm_compute::misc::shape_calculator; diff --git a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp index 1ffcca0d7f..af0f5efb69 100644 --- a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" -#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h" +#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp b/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp index 0aaa37ec92..c4f15e3b68 100644 --- a/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h" #include "support/MemorySupport.h" diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp index 6c22523bcb..fc97279211 100644 --- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp @@ -27,6 +27,8 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" +#include "support/MemorySupport.h" using namespace arm_compute::misc; using namespace arm_compute::misc::shape_calculator; @@ -69,10 +71,11 @@ Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo } } // namespace +NEDepthwiseConvolutionLayer::~NEDepthwiseConvolutionLayer() = default; + NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr memory_manager) - : _memory_group(memory_manager), _dwc_optimized_func(memory_manager), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), - _accumulator(), _permuted_input(), _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_nchw(true), _permute(false), - _is_activationlayer_enabled(false), _is_prepared(false) + : _memory_group(memory_manager), _dwc_optimized_func(memory_manager), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _accumulator(), _permuted_input(), + _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false) { } @@ -243,7 +246,8 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure( } _original_weights = weights_to_use; - _depthwise_conv_kernel.configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier, dilation); + _depthwise_conv_kernel = arm_compute::support::cpp14::make_unique(); + _depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier, dilation); if(_is_nchw) { @@ -309,7 +313,7 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::run() _permute_input.run(); } - NEScheduler::get().schedule(&_depthwise_conv_kernel, Window::DimY); + NEScheduler::get().schedule(_depthwise_conv_kernel.get(), Window::DimY); if(_is_nchw) { diff --git a/src/runtime/NEON/functions/NEDequantizationLayer.cpp b/src/runtime/NEON/functions/NEDequantizationLayer.cpp index a4a3a43b2e..0c0f86c82b 100644 --- a/src/runtime/NEON/functions/NEDequantizationLayer.cpp +++ b/src/runtime/NEON/functions/NEDequantizationLayer.cpp @@ -24,7 +24,7 @@ #include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" -#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h" +#include "src/core/NEON/kernels/NEDequantizationLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEDerivative.cpp b/src/runtime/NEON/functions/NEDerivative.cpp index 24991400b8..f007e9fda3 100644 --- a/src/runtime/NEON/functions/NEDerivative.cpp +++ b/src/runtime/NEON/functions/NEDerivative.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,12 +24,16 @@ #include "arm_compute/runtime/NEON/functions/NEDerivative.h" #include "arm_compute/core/Error.h" -#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEDerivativeKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEDerivative::~NEDerivative() = default; NEDerivative::NEDerivative() : _kernel(), _border_handler() @@ -41,12 +45,16 @@ void NEDerivative::configure(ITensor *input, ITensor *output_x, ITensor *output_ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); - _kernel.configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED); - _border_handler.configure(input, BorderSize(1), border_mode, PixelValue(constant_border_value)); + _kernel = arm_compute::support::cpp14::make_unique(); + _border_handler = arm_compute::support::cpp14::make_unique(); + + _kernel->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _border_handler->configure(input, BorderSize(1), border_mode, PixelValue(constant_border_value)); } void NEDerivative::run() { - NEScheduler::get().schedule(&_border_handler, Window::DimZ); - NEScheduler::get().schedule(&_kernel, Window::DimY); + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); + NEScheduler::get().schedule(_kernel.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEDilate.cpp b/src/runtime/NEON/functions/NEDilate.cpp index 7f503865b4..70c0b61639 100644 --- a/src/runtime/NEON/functions/NEDilate.cpp +++ b/src/runtime/NEON/functions/NEDilate.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/runtime/NEON/functions/NEDilate.h" -#include "arm_compute/core/NEON/kernels/NEDilateKernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/NEON/kernels/NEDilateKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "support/MemorySupport.h" #include @@ -36,5 +37,8 @@ void NEDilate::configure(ITensor *input, ITensor *output, BorderMode border_mode auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp index fe545905d5..98d6386ffe 100644 --- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp @@ -27,9 +27,15 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" +#include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEDirectConvolutionLayer::~NEDirectConvolutionLayer() = default; + NEDirectConvolutionLayer::NEDirectConvolutionLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _output_stage_kernel(), _conv_kernel(), _input_border_handler(), _activationlayer_function(), _accumulator(), _has_bias(false), _is_activationlayer_enabled(false), _dim_split(Window::DimZ), _is_padding_required() @@ -39,6 +45,9 @@ NEDirectConvolutionLayer::NEDirectConvolutionLayer(std::shared_ptrinfo()->data_layout() == DataLayout::UNKNOWN); + _output_stage_kernel = arm_compute::support::cpp14::make_unique(); + _conv_kernel = arm_compute::support::cpp14::make_unique(); + _input_border_handler = arm_compute::support::cpp14::make_unique(); // Free accumulator if(_accumulator.buffer() != nullptr) @@ -51,17 +60,17 @@ void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights, // Check if bias should be added in the convolution result _has_bias = (bias != nullptr); - _conv_kernel.configure(input, weights, output, conv_info); + _conv_kernel->configure(input, weights, output, conv_info); if(_has_bias) { - _output_stage_kernel.configure(output, bias); + _output_stage_kernel->configure(output, bias); } - _is_padding_required = !_conv_kernel.border_size().empty(); + _is_padding_required = !_conv_kernel->border_size().empty(); if(_is_padding_required) { // Add zero padding XY - _input_border_handler.configure(input, _conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0.f))); + _input_border_handler->configure(input, _conv_kernel->border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0.f))); } //Configure Activation Layer @@ -109,12 +118,12 @@ void NEDirectConvolutionLayer::run() if(_is_padding_required) { - NEScheduler::get().schedule(&_input_border_handler, Window::DimZ); + NEScheduler::get().schedule(_input_border_handler.get(), Window::DimZ); } - NEScheduler::get().schedule(&_conv_kernel, _dim_split); + NEScheduler::get().schedule(_conv_kernel.get(), _dim_split); if(_has_bias) { - NEScheduler::get().schedule(&_output_stage_kernel, Window::DimY); + NEScheduler::get().schedule(_output_stage_kernel.get(), Window::DimY); } if(_is_activationlayer_enabled) diff --git a/src/runtime/NEON/functions/NEElementwiseOperators.cpp b/src/runtime/NEON/functions/NEElementwiseOperators.cpp index d1f60c71e1..7f3fe8b30b 100644 --- a/src/runtime/NEON/functions/NEElementwiseOperators.cpp +++ b/src/runtime/NEON/functions/NEElementwiseOperators.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/functions/NEElementwiseOperations.h" -#include +#include #include "arm_compute/core/ITensor.h" #include "support/MemorySupport.h" diff --git a/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp b/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp index cb4e3a0b7d..5e130205d2 100644 --- a/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp +++ b/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h" -#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h" +#include "src/core/NEON/kernels/NEElementwiseUnaryKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEEqualizeHistogram.cpp b/src/runtime/NEON/functions/NEEqualizeHistogram.cpp index b3d5ad484f..d3ff171323 100644 --- a/src/runtime/NEON/functions/NEEqualizeHistogram.cpp +++ b/src/runtime/NEON/functions/NEEqualizeHistogram.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,8 +28,15 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NECumulativeDistributionKernel.h" +#include "src/core/NEON/kernels/NEHistogramKernel.h" +#include "src/core/NEON/kernels/NEHistogramKernel.h" +#include "src/core/NEON/kernels/NETableLookupKernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEEqualizeHistogram::~NEEqualizeHistogram() = default; NEEqualizeHistogram::NEEqualizeHistogram() : _histogram_kernel(), _cd_histogram_kernel(), _map_histogram_kernel(), _hist(nr_bins, 0, max_range), _cum_dist(nr_bins, 0, max_range), _cd_lut(nr_bins, DataType::U8) @@ -43,20 +50,25 @@ void NEEqualizeHistogram::configure(const IImage *input, IImage *output) ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + _histogram_kernel = arm_compute::support::cpp14::make_unique(); + _cd_histogram_kernel = arm_compute::support::cpp14::make_unique(); + _map_histogram_kernel = arm_compute::support::cpp14::make_unique(); + // Configure kernels - _histogram_kernel.configure(input, &_hist); - _cd_histogram_kernel.configure(input, &_hist, &_cum_dist, &_cd_lut); - _map_histogram_kernel.configure(input, &_cd_lut, output); + _histogram_kernel->configure(input, &_hist); + _cd_histogram_kernel->configure(input, &_hist, &_cum_dist, &_cd_lut); + _map_histogram_kernel->configure(input, &_cd_lut, output); } void NEEqualizeHistogram::run() { // Calculate histogram of input. - NEScheduler::get().schedule(&_histogram_kernel, Window::DimY); + NEScheduler::get().schedule(_histogram_kernel.get(), Window::DimY); // Calculate cumulative distribution of histogram and create LUT. - NEScheduler::get().schedule(&_cd_histogram_kernel, Window::DimY); + NEScheduler::get().schedule(_cd_histogram_kernel.get(), Window::DimY); // Map input to output using created LUT. - NEScheduler::get().schedule(&_map_histogram_kernel, Window::DimY); + NEScheduler::get().schedule(_map_histogram_kernel.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEErode.cpp b/src/runtime/NEON/functions/NEErode.cpp index a89993c1fe..748694fe3f 100644 --- a/src/runtime/NEON/functions/NEErode.cpp +++ b/src/runtime/NEON/functions/NEErode.cpp @@ -23,18 +23,23 @@ */ #include "arm_compute/runtime/NEON/functions/NEErode.h" -#include "arm_compute/core/NEON/kernels/NEErodeKernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/NEON/kernels/NEErodeKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NEErode::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEFFT1D.cpp b/src/runtime/NEON/functions/NEFFT1D.cpp index 2c53b185df..b94c25832a 100644 --- a/src/runtime/NEON/functions/NEFFT1D.cpp +++ b/src/runtime/NEON/functions/NEFFT1D.cpp @@ -26,10 +26,16 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h" +#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h" +#include "src/core/NEON/kernels/NEFFTScaleKernel.h" #include "src/core/utils/helpers/fft.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEFFT1D::~NEFFT1D() = default; + NEFFT1D::NEFFT1D(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _digit_reverse_kernel(), _fft_kernels(), _scale_kernel(), _digit_reversed_input(), _digit_reverse_indices(), _num_ffts(0), _axis(0), _run_scale(false) { @@ -58,7 +64,8 @@ void NEFFT1D::configure(const ITensor *input, ITensor *output, const FFT1DInfo & TensorInfo digit_reverse_indices_info(TensorShape(input->info()->tensor_shape()[config.axis]), 1, DataType::U32); _digit_reverse_indices.allocator()->init(digit_reverse_indices_info); _memory_group.manage(&_digit_reversed_input); - _digit_reverse_kernel.configure(input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config); + _digit_reverse_kernel = arm_compute::support::cpp14::make_unique(); + _digit_reverse_kernel->configure(input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config); // Create and configure FFT kernels unsigned int Nx = 1; @@ -75,7 +82,8 @@ void NEFFT1D::configure(const ITensor *input, ITensor *output, const FFT1DInfo & fft_kernel_info.radix = radix_for_stage; fft_kernel_info.Nx = Nx; fft_kernel_info.is_first_stage = (i == 0); - _fft_kernels[i].configure(&_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info); + _fft_kernels[i] = arm_compute::support::cpp14::make_unique(); + _fft_kernels[i]->configure(&_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info); Nx *= radix_for_stage; } @@ -86,7 +94,8 @@ void NEFFT1D::configure(const ITensor *input, ITensor *output, const FFT1DInfo & FFTScaleKernelInfo scale_config; scale_config.scale = static_cast(N); scale_config.conjugate = config.direction == FFTDirection::Inverse; - is_c2r ? _scale_kernel.configure(&_digit_reversed_input, output, scale_config) : _scale_kernel.configure(output, nullptr, scale_config); + _scale_kernel = arm_compute::support::cpp14::make_unique(); + is_c2r ? _scale_kernel->configure(&_digit_reversed_input, output, scale_config) : _scale_kernel->configure(output, nullptr, scale_config); } // Allocate tensors @@ -128,17 +137,17 @@ void NEFFT1D::run() { MemoryGroupResourceScope scope_mg(_memory_group); - NEScheduler::get().schedule(&_digit_reverse_kernel, (_axis == 0 ? Window::DimY : Window::DimZ)); + NEScheduler::get().schedule(_digit_reverse_kernel.get(), (_axis == 0 ? Window::DimY : Window::DimZ)); for(unsigned int i = 0; i < _num_ffts; ++i) { - NEScheduler::get().schedule(&_fft_kernels[i], (_axis == 0 ? Window::DimY : Window::DimX)); + NEScheduler::get().schedule(_fft_kernels[i].get(), (_axis == 0 ? Window::DimY : Window::DimX)); } // Run output scaling if(_run_scale) { - NEScheduler::get().schedule(&_scale_kernel, Window::DimY); + NEScheduler::get().schedule(_scale_kernel.get(), Window::DimY); } } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEFFT2D.cpp b/src/runtime/NEON/functions/NEFFT2D.cpp index b63afe59c0..3b787cd523 100644 --- a/src/runtime/NEON/functions/NEFFT2D.cpp +++ b/src/runtime/NEON/functions/NEFFT2D.cpp @@ -26,9 +26,14 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/Scheduler.h" +#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h" +#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h" +#include "src/core/NEON/kernels/NEFFTScaleKernel.h" namespace arm_compute { +NEFFT2D::~NEFFT2D() = default; + NEFFT2D::NEFFT2D(std::shared_ptr memory_manager) : _memory_group(memory_manager), _first_pass_func(memory_manager), _second_pass_func(memory_manager), _first_pass_tensor() { diff --git a/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp b/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp index a46fc9f45f..23788b7c39 100644 --- a/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp @@ -27,6 +27,12 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/NEON/kernels/NECopyKernel.h" +#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h" +#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h" +#include "src/core/NEON/kernels/NEFFTScaleKernel.h" +#include "src/core/NEON/kernels/NEPadLayerKernel.h" +#include "src/core/NEON/kernels/NEReductionOperationKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/utils/helpers/fft.h" @@ -96,6 +102,7 @@ NEFFTConvolutionLayer::NEFFTConvolutionLayer(std::shared_ptr mem _is_prepared(false) { } +NEFFTConvolutionLayer::~NEFFTConvolutionLayer() = default; void NEFFTConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) diff --git a/src/runtime/NEON/functions/NEFastCorners.cpp b/src/runtime/NEON/functions/NEFastCorners.cpp index 303c593f84..1bde3cc508 100644 --- a/src/runtime/NEON/functions/NEFastCorners.cpp +++ b/src/runtime/NEON/functions/NEFastCorners.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,15 +25,21 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/Array.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEFastCornersKernel.h" +#include "src/core/NEON/kernels/NEFillArrayKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEFastCorners::~NEFastCorners() = default; NEFastCorners::NEFastCorners(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), @@ -62,24 +68,28 @@ void NEFastCorners::configure(IImage *input, float threshold, bool nonmax_suppre _output.allocator()->init(tensor_info); _memory_group.manage(&_output); + _fast_corners_kernel = arm_compute::support::cpp14::make_unique(); + _border_handler = arm_compute::support::cpp14::make_unique(); + _fill_kernel = arm_compute::support::cpp14::make_unique(); // If border is UNDEFINED _fast_corners_kernel will operate in xwindow (3, // width - 3) and ywindow (3, height -3) so the output image will leave the // pixels on the borders unchanged. This is reflected in the valid region // of the output. The non maxima suppression is only run on the valid // pixels. - _fast_corners_kernel.configure(input, &_output, threshold, nonmax_suppression, BorderMode::UNDEFINED == border_mode); - _border_handler.configure(input, _fast_corners_kernel.border_size(), border_mode, constant_border_value); + _fast_corners_kernel->configure(input, &_output, threshold, nonmax_suppression, BorderMode::UNDEFINED == border_mode); + _border_handler->configure(input, _fast_corners_kernel->border_size(), border_mode, constant_border_value); if(!_non_max) { - _fill_kernel.configure(&_output, 1 /* we keep all texels >0 */, corners); + _fill_kernel->configure(&_output, 1 /* we keep all texels >0 */, corners); } else { _suppressed.allocator()->init(tensor_info); _memory_group.manage(&_suppressed); - _nonmax_kernel.configure(&_output, &_suppressed, BorderMode::UNDEFINED == border_mode); - _fill_kernel.configure(&_suppressed, 1 /* we keep all texels >0 */, corners); + _nonmax_kernel = arm_compute::support::cpp14::make_unique(); + _nonmax_kernel->configure(&_output, &_suppressed, BorderMode::UNDEFINED == border_mode); + _fill_kernel->configure(&_suppressed, 1 /* we keep all texels >0 */, corners); // Allocate intermediate tensors _suppressed.allocator()->allocate(); @@ -91,16 +101,17 @@ void NEFastCorners::configure(IImage *input, float threshold, bool nonmax_suppre void NEFastCorners::run() { - NEScheduler::get().schedule(&_border_handler, Window::DimZ); + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); MemoryGroupResourceScope scope_mg(_memory_group); - NEScheduler::get().schedule(&_fast_corners_kernel, Window::DimY); + NEScheduler::get().schedule(_fast_corners_kernel.get(), Window::DimY); if(_non_max) { - NEScheduler::get().schedule(&_nonmax_kernel, Window::DimY); + NEScheduler::get().schedule(_nonmax_kernel.get(), Window::DimY); } - NEScheduler::get().schedule(&_fill_kernel, Window::DimY); + NEScheduler::get().schedule(_fill_kernel.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEFill.cpp b/src/runtime/NEON/functions/NEFill.cpp index 79fe175e69..68292c9ee0 100644 --- a/src/runtime/NEON/functions/NEFill.cpp +++ b/src/runtime/NEON/functions/NEFill.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/Window.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEMemsetKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEFillBorder.cpp b/src/runtime/NEON/functions/NEFillBorder.cpp index de2ef26b80..e96069f97c 100644 --- a/src/runtime/NEON/functions/NEFillBorder.cpp +++ b/src/runtime/NEON/functions/NEFillBorder.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,16 +25,19 @@ #include "arm_compute/core/Window.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { void NEFillBorder::configure(ITensor *input, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value) { - _border_handler.configure(input, BorderSize(border_width), border_mode, constant_border_value); + _border_handler = arm_compute::support::cpp14::make_unique(); + _border_handler->configure(input, BorderSize(border_width), border_mode, constant_border_value); } void NEFillBorder::run() { - NEScheduler::get().schedule(&_border_handler, Window::DimZ); + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); } } // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEFlattenLayer.cpp b/src/runtime/NEON/functions/NEFlattenLayer.cpp index 936a70dacc..4dfe96325e 100644 --- a/src/runtime/NEON/functions/NEFlattenLayer.cpp +++ b/src/runtime/NEON/functions/NEFlattenLayer.cpp @@ -23,8 +23,8 @@ */ #include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h" -#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" #include "arm_compute/core/Size2D.h" +#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEFloor.cpp b/src/runtime/NEON/functions/NEFloor.cpp index 95b2497ded..5f6bd61017 100644 --- a/src/runtime/NEON/functions/NEFloor.cpp +++ b/src/runtime/NEON/functions/NEFloor.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEFloor.h" -#include "arm_compute/core/NEON/kernels/NEFloorKernel.h" +#include "src/core/NEON/kernels/NEFloorKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index d956d16f4d..714fa58a66 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -29,6 +29,19 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" +#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/core/NEON/kernels/NETransposeKernel.h" #include "support/MemorySupport.h" @@ -145,6 +158,8 @@ Status NEFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, c return NETransposeKernel::validate(input, output); } +NEFullyConnectedLayer::~NEFullyConnectedLayer() = default; + NEFullyConnectedLayer::NEFullyConnectedLayer(std::shared_ptr memory_manager, IWeightsManager *weights_manager) : _memory_group(std::move(memory_manager)), _weights_manager(weights_manager), _flatten_kernel(), _convert_weights(), _convert_weights_managed(), _reshape_weights_function(), _reshape_weights_managed_function(), _mm_gemm(nullptr, weights_manager), _mm_gemmlowp(nullptr, weights_manager), _flatten_output(), _converted_weights_output(), _reshape_weights_output(), @@ -199,7 +214,9 @@ void NEFullyConnectedLayer::configure_conv_fc(const ITensor *input, const ITenso // Configure flatten kernel _memory_group.manage(&_flatten_output); - _flatten_kernel.configure(input, &_flatten_output); + + _flatten_kernel = arm_compute::support::cpp14::make_unique(); + _flatten_kernel->configure(input, &_flatten_output); // Configure matrix multiply kernel configure_mm(&_flatten_output, weights, biases, output, act); @@ -398,7 +415,7 @@ void NEFullyConnectedLayer::run() // Linearize input if it comes from a convolutional layer if(_is_fc_after_conv) { - NEScheduler::get().schedule(&_flatten_kernel, Window::DimY); + NEScheduler::get().schedule(_flatten_kernel.get(), Window::DimY); } // Run matrix multiply diff --git a/src/runtime/NEON/functions/NEFuseBatchNormalization.cpp b/src/runtime/NEON/functions/NEFuseBatchNormalization.cpp index fd26bb49a7..c64fde050e 100644 --- a/src/runtime/NEON/functions/NEFuseBatchNormalization.cpp +++ b/src/runtime/NEON/functions/NEFuseBatchNormalization.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,9 +28,13 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEFuseBatchNormalization::~NEFuseBatchNormalization() = default; + NEFuseBatchNormalization::NEFuseBatchNormalization() : _fuse_bn_kernel() { @@ -41,7 +45,8 @@ void NEFuseBatchNormalization::configure(const ITensor *input_weights, const ITe const ITensor *input_bias, const ITensor *bn_beta, const ITensor *bn_gamma, float epsilon, FuseBatchNormalizationType fbn_type) { - _fuse_bn_kernel.configure(input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type); + _fuse_bn_kernel = arm_compute::support::cpp14::make_unique(); + _fuse_bn_kernel->configure(input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type); } Status NEFuseBatchNormalization::validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, @@ -54,6 +59,6 @@ Status NEFuseBatchNormalization::validate(const ITensorInfo *input_weights, cons void NEFuseBatchNormalization::run() { - NEScheduler::get().schedule(&_fuse_bn_kernel, Window::DimY); + NEScheduler::get().schedule(_fuse_bn_kernel.get(), Window::DimY); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 4166cff97a..0215098792 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -34,7 +34,12 @@ #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/TensorAllocator.h" #include "src/core/CPP/Validate.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "src/core/helpers/AutoConfiguration.h" +#include "support/MemorySupport.h" #include @@ -42,6 +47,8 @@ using namespace arm_compute::misc::shape_calculator; namespace arm_compute { +NEGEMM::~NEGEMM() = default; + NEGEMM::NEGEMM(std::shared_ptr memory_manager, IWeightsManager *weights_manager) : _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(memory_manager, weights_manager), _ma_kernel(), _alpha_scale_func(nullptr), _add_bias(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false), @@ -88,11 +95,13 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe _memory_group.manage(&_tmp_d); } + _mm_kernel = arm_compute::support::cpp14::make_unique(); + // Select between GEMV and GEMM if(_run_vector_matrix_multiplication) { // Configure the matrix multiply kernel - _mm_kernel.configure(a, b, gemm_output_to_use, alpha, false); + _mm_kernel->configure(a, b, gemm_output_to_use, alpha, false); } else { @@ -124,13 +133,15 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe int k = a->info()->dimension(0); // Configure interleave kernel - _interleave_kernel.configure(a, &_tmp_a); + _interleave_kernel = arm_compute::support::cpp14::make_unique(); + _interleave_kernel->configure(a, &_tmp_a); // Configure transpose kernel - _transpose_kernel.configure(b, &_tmp_b); + _transpose_kernel = arm_compute::support::cpp14::make_unique(); + _transpose_kernel->configure(b, &_tmp_b); // Configure matrix multiplication kernel - _mm_kernel.configure(&_tmp_a, &_tmp_b, gemm_output_to_use, alpha, true, GEMMReshapeInfo(m, n, k)); + _mm_kernel->configure(&_tmp_a, &_tmp_b, gemm_output_to_use, alpha, true, GEMMReshapeInfo(m, n, k)); // Allocate once the all configure methods have been called _tmp_a.allocator()->allocate(); @@ -150,7 +161,8 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe // Configure matrix addition kernel if(_run_addition) { - _ma_kernel.configure(c, d, beta); + _ma_kernel = arm_compute::support::cpp14::make_unique(); + _ma_kernel->configure(c, d, beta); } // Configure activation @@ -298,16 +310,16 @@ void NEGEMM::run() if(!_run_vector_matrix_multiplication) { // Run interleave kernel - NEScheduler::get().schedule(&_interleave_kernel, Window::DimY); + NEScheduler::get().schedule(_interleave_kernel.get(), Window::DimY); if(!_reshape_b_only_on_first_run) { // Run transpose kernel - NEScheduler::get().schedule(&_transpose_kernel, Window::DimY); + NEScheduler::get().schedule(_transpose_kernel.get(), Window::DimY); } } - NEScheduler::get().schedule(&_mm_kernel, _run_vector_matrix_multiplication ? Window::DimX : Window::DimY); + NEScheduler::get().schedule(_mm_kernel.get(), _run_vector_matrix_multiplication ? Window::DimX : Window::DimY); // Run bias addition kernel if(_run_bias_addition) @@ -319,7 +331,7 @@ void NEGEMM::run() // Run matrix addition kernel if(_run_addition) { - NEScheduler::get().schedule(&_ma_kernel, Window::DimY); + NEScheduler::get().schedule(_ma_kernel.get(), Window::DimY); } // Run activation function @@ -355,7 +367,7 @@ void NEGEMM::prepare() } _tmp_b.allocator()->allocate(); - NEScheduler::get().schedule(&_transpose_kernel, Window::DimY); + NEScheduler::get().schedule(_transpose_kernel.get(), Window::DimY); if(!original_b_managed_by_weights_manager) { _original_b->mark_as_unused(); diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index 834a66a867..3f50f81af2 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -30,6 +30,21 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NECol2ImKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/core/NEON/kernels/NEIm2ColKernel.h" +#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h" +#include "support/MemorySupport.h" + #include #include @@ -37,6 +52,7 @@ namespace arm_compute { using namespace arm_compute::misc::shape_calculator; +NEConvolutionLayerReshapeWeights::~NEConvolutionLayerReshapeWeights() = default; NEConvolutionLayerReshapeWeights::NEConvolutionLayerReshapeWeights() : _weights_reshape_kernel() { @@ -52,7 +68,8 @@ void NEConvolutionLayerReshapeWeights::configure(const ITensor *weights, const I const bool append_biases = (biases != nullptr) && !is_data_type_quantized_asymmetric(weights->info()->data_type()); const ITensor *biases_to_use = (append_biases) ? biases : nullptr; - _weights_reshape_kernel.configure(weights, biases_to_use, output); + _weights_reshape_kernel = arm_compute::support::cpp14::make_unique(); + _weights_reshape_kernel->configure(weights, biases_to_use, output); output->info()->set_quantization_info(weights->info()->quantization_info()); } @@ -86,9 +103,11 @@ Status NEConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, co void NEConvolutionLayerReshapeWeights::run() { - NEScheduler::get().schedule(&_weights_reshape_kernel, 3); + NEScheduler::get().schedule(_weights_reshape_kernel.get(), 3); } +NEGEMMConvolutionLayer::~NEGEMMConvolutionLayer() = default; + NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr &memory_manager, IWeightsManager *weights_manager) : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), _col2im_kernel(), _reshape_layer(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _data_layout(DataLayout::NCHW), _skip_im2col(false), @@ -323,7 +342,8 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig _memory_group.manage(&_im2col_output); // Configure - _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, false, dilation); + _im2col_kernel = arm_compute::support::cpp14::make_unique(); + _im2col_kernel->configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, false, dilation); // Update GEMM input gemm_input_to_use = &_im2col_output; @@ -365,7 +385,8 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig if(_data_layout == DataLayout::NCHW) { // Configure col2im - _col2im_kernel.configure(gemm_output_to_use, output, Size2D(conv_w, conv_h)); + _col2im_kernel = arm_compute::support::cpp14::make_unique(); + _col2im_kernel->configure(gemm_output_to_use, output, Size2D(conv_w, conv_h)); } else { @@ -538,7 +559,7 @@ void NEGEMMConvolutionLayer::run() { // Run input reshaping unsigned int y_dim = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); - NEScheduler::get().schedule(&_im2col_kernel, y_dim); + NEScheduler::get().schedule(_im2col_kernel.get(), y_dim); } // Runs NEGEMM or NEGEMMLowpMatrixMultiplyCore functions @@ -558,7 +579,7 @@ void NEGEMMConvolutionLayer::run() { if(_data_layout == DataLayout::NCHW) { - NEScheduler::get().schedule(&_col2im_kernel, Window::DimY); + NEScheduler::get().schedule(_col2im_kernel.get(), Window::DimY); } else { diff --git a/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp b/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp index ad306c3662..70fdcf492d 100644 --- a/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp +++ b/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h" -#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp index 6d52f2b15c..09637dd2d6 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp @@ -26,17 +26,19 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEGEMMLowpAssemblyMatrixMultiplyCore::~NEGEMMLowpAssemblyMatrixMultiplyCore() = default; NEGEMMLowpAssemblyMatrixMultiplyCore::NEGEMMLowpAssemblyMatrixMultiplyCore(std::shared_ptr memory_manager) : _memory_group(memory_manager), _asm_glue(memory_manager), _mm_kernel(nullptr), _mtx_a_reshape_kernel(nullptr), _mtx_b_reshape_kernel(nullptr), _tmp_a(), _tmp_b() @@ -137,3 +139,4 @@ void NEGEMMLowpAssemblyMatrixMultiplyCore::run() NEScheduler::get().schedule(_mm_kernel.get(), Window::DimY); } } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index 36357dde41..9050427b34 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -34,12 +34,23 @@ #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" #include "src/core/helpers/AutoConfiguration.h" + +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" + #include "support/MemorySupport.h" namespace arm_compute { using namespace arm_compute::misc::shape_calculator; +NEGEMMLowpMatrixMultiplyCore::~NEGEMMLowpMatrixMultiplyCore() = default; + NEGEMMLowpMatrixMultiplyCore::NEGEMMLowpMatrixMultiplyCore(std::shared_ptr memory_manager, IWeightsManager *weights_manager) : _memory_group(memory_manager), _weights_manager(weights_manager), _asm_glue(memory_manager, weights_manager), _mm_kernel(), _mtx_a_reshape_kernel(), _mtx_b_reshape_kernel(), _mtx_a_reduction_kernel(), _mtx_b_reduction_kernel(), _offset_contribution_kernel(), _offset_contribution_output_stage_kernel(), _activation_func(), _convert_to_signed_asymm(), @@ -80,7 +91,8 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, _signed_a.allocator()->init(a_to_use->info()->clone()->set_data_type(dt).set_quantization_info(QuantizationInfo(iqinfo.scale, iqinfo.offset + offset_correction))); _memory_group.manage(&_signed_a); - _convert_to_signed_asymm.configure(a_to_use, &_signed_a); + _convert_to_signed_asymm = arm_compute::support::cpp14::make_unique(); + _convert_to_signed_asymm->configure(a_to_use, &_signed_a); a_to_use = &_signed_a; _a_offset = _signed_a.info()->quantization_info().uniform().offset; @@ -153,10 +165,12 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, } // Configure interleave kernel - _mtx_a_reshape_kernel.configure(a_to_use, &_tmp_a); + _mtx_a_reshape_kernel = arm_compute::support::cpp14::make_unique(); + _mtx_a_reshape_kernel->configure(a_to_use, &_tmp_a); // Configure transpose kernel - _mtx_b_reshape_kernel.configure(b, &_tmp_b); + _mtx_b_reshape_kernel = arm_compute::support::cpp14::make_unique(); + _mtx_b_reshape_kernel->configure(b, &_tmp_b); } if(!_fused_assembly_path) @@ -176,7 +190,8 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, } // Configure Matrix B reduction kernel - _mtx_b_reduction_kernel.configure(b, &_vector_sum_col, reduction_info); + _mtx_b_reduction_kernel = arm_compute::support::cpp14::make_unique(); + _mtx_b_reduction_kernel->configure(b, &_vector_sum_col, reduction_info); } // Initialize Matrix A reduction kernel only if _b_offset is not equal to 0 @@ -188,7 +203,8 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, _memory_group.manage(&_vector_sum_row); // Configure matrix A reduction kernel - _mtx_a_reduction_kernel.configure(a_to_use, &_vector_sum_row, reduction_info); + _mtx_a_reduction_kernel = arm_compute::support::cpp14::make_unique(); + _mtx_a_reduction_kernel->configure(a_to_use, &_vector_sum_row, reduction_info); } if(_fuse_output_stage) @@ -196,19 +212,22 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, // Configure matrix multiply kernel if(!_assembly_path) { - _mm_kernel.configure(matrix_a, matrix_b, &_mm_result_s32); + _mm_kernel = arm_compute::support::cpp14::make_unique(); + _mm_kernel->configure(matrix_a, matrix_b, &_mm_result_s32); } - _offset_contribution_output_stage_kernel.configure(&_mm_result_s32, - _a_offset == 0 ? nullptr : &_vector_sum_col, - _b_offset == 0 ? nullptr : &_vector_sum_row, c, - _flip_signedness ? &_signed_output : output, - a->info()->dimension(0), - _a_offset, _b_offset, info.gemmlowp_output_stage()); + _offset_contribution_output_stage_kernel = arm_compute::support::cpp14::make_unique(); + _offset_contribution_output_stage_kernel->configure(&_mm_result_s32, + _a_offset == 0 ? nullptr : &_vector_sum_col, + _b_offset == 0 ? nullptr : &_vector_sum_row, c, + _flip_signedness ? &_signed_output : output, + a->info()->dimension(0), + _a_offset, _b_offset, info.gemmlowp_output_stage()); if(_flip_signedness) { - _convert_from_signed_asymm.configure(&_signed_output, output); + _convert_from_signed_asymm = arm_compute::support::cpp14::make_unique(); + _convert_from_signed_asymm->configure(&_signed_output, output); } } else @@ -216,10 +235,12 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, // Configure matrix multiply kernel if(!_assembly_path) { - _mm_kernel.configure(matrix_a, matrix_b, output); + _mm_kernel = arm_compute::support::cpp14::make_unique(); + _mm_kernel->configure(matrix_a, matrix_b, output); } // Configure offset contribution kernel - _offset_contribution_kernel.configure(output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, a_to_use->info()->dimension(0), _a_offset, _b_offset); + _offset_contribution_kernel = arm_compute::support::cpp14::make_unique(); + _offset_contribution_kernel->configure(output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, a_to_use->info()->dimension(0), _a_offset, _b_offset); } // Configure activation @@ -468,7 +489,7 @@ void NEGEMMLowpMatrixMultiplyCore::run() // Convert QASYMM8->QASYMM8_SIGNED if(_flip_signedness) { - NEScheduler::get().schedule(&_convert_to_signed_asymm, Window::DimY); + NEScheduler::get().schedule(_convert_to_signed_asymm.get(), Window::DimY); } // Run GEMM @@ -481,15 +502,15 @@ void NEGEMMLowpMatrixMultiplyCore::run() if(!_run_vector_matrix_multiplication) { // Run interleave kernel - NEScheduler::get().schedule(&_mtx_a_reshape_kernel, Window::DimY); + NEScheduler::get().schedule(_mtx_a_reshape_kernel.get(), Window::DimY); if(!_reshape_b_only_on_first_run) { // Run transpose kernel - NEScheduler::get().schedule(&_mtx_b_reshape_kernel, Window::DimY); + NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY); } } - NEScheduler::get().schedule(&_mm_kernel, Window::DimY); + NEScheduler::get().schedule(_mm_kernel.get(), Window::DimY); } if(!_fused_assembly_path) @@ -497,31 +518,31 @@ void NEGEMMLowpMatrixMultiplyCore::run() // Run matrix A reduction kernel only if _b_offset is not equal to 0 if(_b_offset != 0) { - NEScheduler::get().schedule(&_mtx_a_reduction_kernel, Window::DimX); + NEScheduler::get().schedule(_mtx_a_reduction_kernel.get(), Window::DimX); } // Run matrix B reduction kernel only if _a_offset is not equal to 0 if(_a_offset != 0 && !_reshape_b_only_on_first_run) { - NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX); + NEScheduler::get().schedule(_mtx_b_reduction_kernel.get(), Window::DimX); } if(_fuse_output_stage) { // Run offset contribution kernel - NEScheduler::get().schedule(&_offset_contribution_output_stage_kernel, Window::DimY); + NEScheduler::get().schedule(_offset_contribution_output_stage_kernel.get(), Window::DimY); } else { // Run offset contribution kernel - NEScheduler::get().schedule(&_offset_contribution_kernel, Window::DimY); + NEScheduler::get().schedule(_offset_contribution_kernel.get(), Window::DimY); } } // Convert QASYMM8_SIGNED->QASYMM8 - if(_flip_signedness) + if(!_fused_assembly_path && _fuse_output_stage && _flip_signedness) { - NEScheduler::get().schedule(&_convert_from_signed_asymm, Window::DimY); + NEScheduler::get().schedule(_convert_from_signed_asymm.get(), Window::DimY); } // Run fused activation unless already run in the fused assembly @@ -560,7 +581,7 @@ void NEGEMMLowpMatrixMultiplyCore::prepare() // Run reshape kernel and mark original weights tensor as unused _tmp_b.allocator()->allocate(); - NEScheduler::get().schedule(&_mtx_b_reshape_kernel, Window::DimY); + NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY); if(!original_b_managed_by_weights_manager) { _original_b->mark_as_unused(); @@ -571,7 +592,7 @@ void NEGEMMLowpMatrixMultiplyCore::prepare() if(!_fused_assembly_path && _a_offset != 0 && _reshape_b_only_on_first_run) { _vector_sum_col.allocator()->allocate(); - NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX); + NEScheduler::get().schedule(_mtx_b_reduction_kernel.get(), Window::DimX); } _is_prepared = true; diff --git a/src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp b/src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp index 239a8e668a..9fb8851d7a 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp @@ -24,15 +24,17 @@ #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" #include "support/MemorySupport.h" namespace arm_compute { +NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::~NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint() = default; + void NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min, int max) { @@ -46,6 +48,8 @@ Status NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(const ITens return NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::validate(input, bias, output, min, max); } +NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::~NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint() = default; + void NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min, int max) { @@ -59,6 +63,8 @@ Status NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::validate(const ITenso return NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::validate(input, bias, output, min, max); } +NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::~NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint() = default; + void NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min, int max) { auto k = arm_compute::support::cpp14::make_unique(); @@ -71,6 +77,8 @@ Status NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::validate(const ITens return NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::validate(input, bias, output, min, max); } +NEGEMMLowpOutputStage::~NEGEMMLowpOutputStage() = default; + void NEGEMMLowpOutputStage::configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo &info) { // Perform validate step diff --git a/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp b/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp index e807e86299..90cf0bab07 100644 --- a/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp +++ b/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp @@ -25,9 +25,9 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEGather.cpp b/src/runtime/NEON/functions/NEGather.cpp index 5238936015..5c0dae1507 100644 --- a/src/runtime/NEON/functions/NEGather.cpp +++ b/src/runtime/NEON/functions/NEGather.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEGather.h" -#include "arm_compute/core/NEON/kernels/NEGatherKernel.h" +#include "src/core/NEON/kernels/NEGatherKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEGaussian3x3.cpp b/src/runtime/NEON/functions/NEGaussian3x3.cpp index fba49ede2a..5290de1348 100644 --- a/src/runtime/NEON/functions/NEGaussian3x3.cpp +++ b/src/runtime/NEON/functions/NEGaussian3x3.cpp @@ -23,18 +23,23 @@ */ #include "arm_compute/runtime/NEON/functions/NEGaussian3x3.h" -#include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEGaussian3x3Kernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NEGaussian3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEGaussian5x5.cpp b/src/runtime/NEON/functions/NEGaussian5x5.cpp index 99591f4107..7857710462 100644 --- a/src/runtime/NEON/functions/NEGaussian5x5.cpp +++ b/src/runtime/NEON/functions/NEGaussian5x5.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,13 +24,17 @@ #include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEGaussian5x5::~NEGaussian5x5() = default; NEGaussian5x5::NEGaussian5x5(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _kernel_hor(), _kernel_vert(), _tmp(), _border_handler() @@ -46,21 +50,26 @@ void NEGaussian5x5::configure(ITensor *input, ITensor *output, BorderMode border // Manage intermediate buffers _memory_group.manage(&_tmp); + _kernel_hor = arm_compute::support::cpp14::make_unique(); + _kernel_vert = arm_compute::support::cpp14::make_unique(); + _border_handler = arm_compute::support::cpp14::make_unique(); + // Create and configure kernels for the two passes - _kernel_hor.configure(input, &_tmp, border_mode == BorderMode::UNDEFINED); - _kernel_vert.configure(&_tmp, output, border_mode == BorderMode::UNDEFINED); + _kernel_hor->configure(input, &_tmp, border_mode == BorderMode::UNDEFINED); + _kernel_vert->configure(&_tmp, output, border_mode == BorderMode::UNDEFINED); _tmp.allocator()->allocate(); - _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value)); } void NEGaussian5x5::run() { - NEScheduler::get().schedule(&_border_handler, Window::DimZ); + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); MemoryGroupResourceScope scope_mg(_memory_group); - NEScheduler::get().schedule(&_kernel_hor, Window::DimY); - NEScheduler::get().schedule(&_kernel_vert, Window::DimY); + NEScheduler::get().schedule(_kernel_hor.get(), Window::DimY); + NEScheduler::get().schedule(_kernel_vert.get(), Window::DimY); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEGaussianPyramid.cpp b/src/runtime/NEON/functions/NEGaussianPyramid.cpp index e4e20e041b..30fe70f0ab 100644 --- a/src/runtime/NEON/functions/NEGaussianPyramid.cpp +++ b/src/runtime/NEON/functions/NEGaussianPyramid.cpp @@ -25,16 +25,18 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h" -#include "arm_compute/core/NEON/kernels/NEScaleKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" #include "arm_compute/runtime/Pyramid.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h" +#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h" +#include "src/core/NEON/kernels/NEScaleKernel.h" +#include "support/MemorySupport.h" #include @@ -45,6 +47,8 @@ NEGaussianPyramid::NEGaussianPyramid() { } +NEGaussianPyramidHalf::~NEGaussianPyramidHalf() = default; + NEGaussianPyramidHalf::NEGaussianPyramidHalf() // NOLINT : _horizontal_border_handler(), _vertical_border_handler(), @@ -94,16 +98,20 @@ void NEGaussianPyramidHalf::configure(const ITensor *input, IPyramid *pyramid, B for(size_t i = 0; i < num_stages; ++i) { /* Configure horizontal kernel */ - _horizontal_reduction[i].configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i)); + _horizontal_reduction[i] = arm_compute::support::cpp14::make_unique(); + _horizontal_reduction[i]->configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i)); /* Configure vertical kernel */ - _vertical_reduction[i].configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1)); + _vertical_reduction[i] = arm_compute::support::cpp14::make_unique(); + _vertical_reduction[i]->configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1)); /* Configure border */ - _horizontal_border_handler[i].configure(_pyramid->get_pyramid_level(i), _horizontal_reduction[i].border_size(), border_mode, PixelValue(constant_border_value)); + _horizontal_border_handler[i] = arm_compute::support::cpp14::make_unique(); + _horizontal_border_handler[i]->configure(_pyramid->get_pyramid_level(i), _horizontal_reduction[i]->border_size(), border_mode, PixelValue(constant_border_value)); /* Configure border */ - _vertical_border_handler[i].configure(_tmp.get_pyramid_level(i), _vertical_reduction[i].border_size(), border_mode, PixelValue(pixel_value_u16)); + _vertical_border_handler[i] = arm_compute::support::cpp14::make_unique(); + _vertical_border_handler[i]->configure(_tmp.get_pyramid_level(i), _vertical_reduction[i]->border_size(), border_mode, PixelValue(pixel_value_u16)); } _tmp.allocate(); @@ -122,13 +130,15 @@ void NEGaussianPyramidHalf::run() for(unsigned int i = 0; i < num_levels - 1; ++i) { - NEScheduler::get().schedule(&_horizontal_border_handler[i], Window::DimZ); - NEScheduler::get().schedule(&_horizontal_reduction[i], Window::DimY); - NEScheduler::get().schedule(&_vertical_border_handler[i], Window::DimZ); - NEScheduler::get().schedule(&_vertical_reduction[i], Window::DimY); + NEScheduler::get().schedule(_horizontal_border_handler[i].get(), Window::DimZ); + NEScheduler::get().schedule(_horizontal_reduction[i].get(), Window::DimY); + NEScheduler::get().schedule(_vertical_border_handler[i].get(), Window::DimZ); + NEScheduler::get().schedule(_vertical_reduction[i].get(), Window::DimY); } } +NEGaussianPyramidOrb::~NEGaussianPyramidOrb() = default; + NEGaussianPyramidOrb::NEGaussianPyramidOrb() // NOLINT : _gaus5x5(), _scale_nearest() diff --git a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp index 13210a06cd..d9a498e4bd 100644 --- a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp +++ b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp @@ -25,19 +25,22 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NECopyKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEPadLayerKernel.h" #include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { NEGenerateProposalsLayer::NEGenerateProposalsLayer(std::shared_ptr memory_manager) : _memory_group(memory_manager), - _permute_deltas_kernel(), + _permute_deltas(), _flatten_deltas(), - _permute_scores_kernel(), + _permute_scores(), _flatten_scores(), - _compute_anchors_kernel(), - _bounding_box_kernel(), - _pad_kernel(), + _compute_anchors(), + _bounding_box(), + _pad(), _dequantize_anchors(), _dequantize_deltas(), _quantize_all_proposals(), @@ -62,6 +65,8 @@ NEGenerateProposalsLayer::NEGenerateProposalsLayer(std::shared_ptrinit(TensorInfo(flatten_shape_deltas, 1, scores_data_type, deltas->info()->quantization_info())); @@ -95,7 +100,7 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d if(!_is_nhwc) { _memory_group.manage(&_deltas_permuted); - _permute_deltas_kernel.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 }); + _permute_deltas.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 }); _flatten_deltas.configure(&_deltas_permuted, &_deltas_flattened); _deltas_permuted.allocator()->allocate(); } @@ -112,7 +117,7 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d if(!_is_nhwc) { _memory_group.manage(&_scores_permuted); - _permute_scores_kernel.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 }); + _permute_scores.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 }); _flatten_scores.configure(&_scores_permuted, &_scores_flattened); _scores_permuted.allocator()->allocate(); } @@ -141,7 +146,7 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d // Bounding box transform _memory_group.manage(&_all_proposals); BoundingBoxTransformInfo bbox_info(info.im_width(), info.im_height(), 1.f); - _bounding_box_kernel.configure(anchors_to_use, &_all_proposals, deltas_to_use, bbox_info); + _bounding_box.configure(anchors_to_use, &_all_proposals, deltas_to_use, bbox_info); deltas_to_use->allocator()->allocate(); anchors_to_use->allocator()->allocate(); @@ -197,7 +202,7 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d _scores_flattened.allocator()->allocate(); // Add the first column that represents the batch id. This will be all zeros, as we don't support multiple images - _pad_kernel.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } }); + _pad.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } }); _proposals_4_roi_values.allocator()->allocate(); } @@ -229,7 +234,7 @@ Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens } TensorInfo all_anchors_info(anchors->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); - ARM_COMPUTE_RETURN_ON_ERROR(NEComputeAllAnchorsKernel::validate(anchors, &all_anchors_info, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()))); + ARM_COMPUTE_RETURN_ON_ERROR(NEComputeAllAnchors::validate(anchors, &all_anchors_info, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()))); TensorInfo deltas_permuted_info = deltas->clone()->set_tensor_shape(TensorShape(values_per_roi * num_anchors, feat_width, feat_height)).set_is_resizable(true); TensorInfo scores_permuted_info = scores->clone()->set_tensor_shape(TensorShape(num_anchors, feat_width, feat_height)).set_is_resizable(true); @@ -240,8 +245,8 @@ Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens } else { - ARM_COMPUTE_RETURN_ON_ERROR(NEPermuteKernel::validate(deltas, &deltas_permuted_info, PermutationVector{ 2, 0, 1 })); - ARM_COMPUTE_RETURN_ON_ERROR(NEPermuteKernel::validate(scores, &scores_permuted_info, PermutationVector{ 2, 0, 1 })); + ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(deltas, &deltas_permuted_info, PermutationVector{ 2, 0, 1 })); + ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(scores, &scores_permuted_info, PermutationVector{ 2, 0, 1 })); } TensorInfo deltas_flattened_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); @@ -258,25 +263,25 @@ Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens if(is_qasymm8) { TensorInfo all_anchors_f32_info(anchors->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32)); - ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayerKernel::validate(&all_anchors_info, &all_anchors_f32_info)); + ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayer::validate(&all_anchors_info, &all_anchors_f32_info)); TensorInfo deltas_flattened_f32_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32)); - ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayerKernel::validate(&deltas_flattened_info, &deltas_flattened_f32_info)); + ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayer::validate(&deltas_flattened_info, &deltas_flattened_f32_info)); TensorInfo proposals_4_roi_values_f32(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32)); - ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransformKernel::validate(&all_anchors_f32_info, &proposals_4_roi_values_f32, &deltas_flattened_f32_info, - BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f))); + ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransform::validate(&all_anchors_f32_info, &proposals_4_roi_values_f32, &deltas_flattened_f32_info, + BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f))); - ARM_COMPUTE_RETURN_ON_ERROR(NEQuantizationLayerKernel::validate(&proposals_4_roi_values_f32, &proposals_4_roi_values_quantized)); + ARM_COMPUTE_RETURN_ON_ERROR(NEQuantizationLayer::validate(&proposals_4_roi_values_f32, &proposals_4_roi_values_quantized)); proposals_4_roi_values_to_use = &proposals_4_roi_values_quantized; } else { - ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransformKernel::validate(&all_anchors_info, &proposals_4_roi_values, &deltas_flattened_info, - BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f))); + ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransform::validate(&all_anchors_info, &proposals_4_roi_values, &deltas_flattened_info, + BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f))); } - ARM_COMPUTE_RETURN_ON_ERROR(NEPadLayerKernel::validate(proposals_4_roi_values_to_use, proposals, PaddingList{ { 1, 0 } })); + ARM_COMPUTE_RETURN_ON_ERROR(NEPadLayer::validate(proposals_4_roi_values_to_use, proposals, PaddingList{ { 1, 0 } })); if(num_valid_proposals->total_size() > 0) { @@ -319,13 +324,13 @@ void NEGenerateProposalsLayer::run() MemoryGroupResourceScope scope_mg(_memory_group); // Compute all the anchors - NEScheduler::get().schedule(&_compute_anchors_kernel, Window::DimY); + _compute_anchors.run(); // Transpose and reshape the inputs if(!_is_nhwc) { - NEScheduler::get().schedule(&_permute_deltas_kernel, Window::DimY); - NEScheduler::get().schedule(&_permute_scores_kernel, Window::DimY); + _permute_deltas.run(); + _permute_scores.run(); } _flatten_deltas.run(); @@ -333,22 +338,22 @@ void NEGenerateProposalsLayer::run() if(_is_qasymm8) { - NEScheduler::get().schedule(&_dequantize_anchors, Window::DimY); - NEScheduler::get().schedule(&_dequantize_deltas, Window::DimY); + _dequantize_anchors.run(); + _dequantize_deltas.run(); } // Build the boxes - NEScheduler::get().schedule(&_bounding_box_kernel, Window::DimY); + _bounding_box.run(); if(_is_qasymm8) { - NEScheduler::get().schedule(&_quantize_all_proposals, Window::DimY); + _quantize_all_proposals.run(); } // Non maxima suppression _cpp_nms.run(); // Add dummy batch indexes - NEScheduler::get().schedule(&_pad_kernel, Window::DimY); + _pad.run(); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEHOGDescriptor.cpp b/src/runtime/NEON/functions/NEHOGDescriptor.cpp index 10765f9b86..689e64fae7 100644 --- a/src/runtime/NEON/functions/NEHOGDescriptor.cpp +++ b/src/runtime/NEON/functions/NEHOGDescriptor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,8 +28,14 @@ #include "arm_compute/core/Size2D.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEDerivativeKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEHOGDescriptor::~NEHOGDescriptor() = default; NEHOGDescriptor::NEHOGDescriptor(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _gradient(), _orient_bin(), _block_norm(), _mag(), _phase(), _hog_space() @@ -82,10 +88,12 @@ void NEHOGDescriptor::configure(ITensor *input, ITensor *output, const IHOG *hog _memory_group.manage(&_hog_space); // Initialise orientation binning kernel - _orient_bin.configure(&_mag, &_phase, &_hog_space, hog->info()); + _orient_bin = arm_compute::support::cpp14::make_unique(); + _orient_bin->configure(&_mag, &_phase, &_hog_space, hog->info()); // Initialize HOG norm kernel - _block_norm.configure(&_hog_space, output, hog->info()); + _block_norm = arm_compute::support::cpp14::make_unique(); + _block_norm->configure(&_hog_space, output, hog->info()); // Allocate intermediate tensors _mag.allocator()->allocate(); @@ -101,8 +109,9 @@ void NEHOGDescriptor::run() _gradient.run(); // Run orientation binning kernel - NEScheduler::get().schedule(&_orient_bin, Window::DimY); + NEScheduler::get().schedule(_orient_bin.get(), Window::DimY); // Run block normalization kernel - NEScheduler::get().schedule(&_block_norm, Window::DimY); + NEScheduler::get().schedule(_block_norm.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEHOGDetector.cpp b/src/runtime/NEON/functions/NEHOGDetector.cpp index 21db5f83b7..8468b75f4e 100644 --- a/src/runtime/NEON/functions/NEHOGDetector.cpp +++ b/src/runtime/NEON/functions/NEHOGDetector.cpp @@ -23,10 +23,12 @@ */ #include "arm_compute/runtime/NEON/functions/NEHOGDetector.h" -#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" +#include "src/core/NEON/kernels/NEHOGDetectorKernel.h" #include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEHOGDetector::~NEHOGDetector() = default; void NEHOGDetector::configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold, size_t idx_class) { @@ -34,3 +36,4 @@ void NEHOGDetector::configure(const ITensor *input, const IHOG *hog, IDetectionW k->configure(input, hog, detection_windows, detection_window_stride, threshold, idx_class); _kernel = std::move(k); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEHOGGradient.cpp b/src/runtime/NEON/functions/NEHOGGradient.cpp index 8f3559a7ed..7d794bc1a0 100644 --- a/src/runtime/NEON/functions/NEHOGGradient.cpp +++ b/src/runtime/NEON/functions/NEHOGGradient.cpp @@ -23,12 +23,16 @@ */ #include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" -#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEDerivativeKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h" #include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEHOGGradient::~NEHOGGradient() = default; NEHOGGradient::NEHOGGradient(std::shared_ptr memory_manager) // NOLINT : _memory_group(std::move(memory_manager)), @@ -88,3 +92,4 @@ void NEHOGGradient::run() // Run magnitude/phase kernel NEScheduler::get().schedule(_mag_phase.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp index e08b699e1c..3e41faad43 100644 --- a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp +++ b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp @@ -28,8 +28,13 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/Tensor.h" +#include "src/core/NEON/kernels/NEDerivativeKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h" -using namespace arm_compute; +namespace arm_compute +{ +NEHOGMultiDetection::~NEHOGMultiDetection() = default; NEHOGMultiDetection::NEHOGMultiDetection(std::shared_ptr memory_manager) // NOLINT : _memory_group(std::move(memory_manager)), @@ -262,3 +267,4 @@ void NEHOGMultiDetection::run() NEScheduler::get().schedule(&_non_maxima_kernel, Window::DimY); } } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEHarrisCorners.cpp b/src/runtime/NEON/functions/NEHarrisCorners.cpp index 3c51eb2249..23fcf8c805 100644 --- a/src/runtime/NEON/functions/NEHarrisCorners.cpp +++ b/src/runtime/NEON/functions/NEHarrisCorners.cpp @@ -24,8 +24,6 @@ #include "arm_compute/runtime/NEON/functions/NEHarrisCorners.h" #include "arm_compute/core/Error.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/Array.h" @@ -34,12 +32,19 @@ #include "arm_compute/runtime/NEON/functions/NESobel5x5.h" #include "arm_compute/runtime/NEON/functions/NESobel7x7.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "src/core/NEON/kernels/NESobel5x5Kernel.h" +#include "src/core/NEON/kernels/NESobel7x7Kernel.h" #include "support/MemorySupport.h" #include #include -using namespace arm_compute; +namespace arm_compute +{ +NEHarrisCorners::~NEHarrisCorners() = default; NEHarrisCorners::NEHarrisCorners(std::shared_ptr memory_manager) // NOLINT : _memory_group(std::move(memory_manager)), @@ -154,8 +159,10 @@ void NEHarrisCorners::configure(IImage *input, float threshold, float min_dist, } // Configure border filling before harris score - _border_gx.configure(&_gx, _harris_score->border_size(), border_mode, constant_border_value); - _border_gy.configure(&_gy, _harris_score->border_size(), border_mode, constant_border_value); + _border_gx = arm_compute::support::cpp14::make_unique(); + _border_gy = arm_compute::support::cpp14::make_unique(); + _border_gx->configure(&_gx, _harris_score->border_size(), border_mode, constant_border_value); + _border_gy->configure(&_gy, _harris_score->border_size(), border_mode, constant_border_value); // Allocate once all the configure methods have been called _gx.allocator()->allocate(); @@ -193,8 +200,8 @@ void NEHarrisCorners::run() _sobel->run(); // Fill border before harris score kernel - NEScheduler::get().schedule(&_border_gx, Window::DimZ); - NEScheduler::get().schedule(&_border_gy, Window::DimZ); + NEScheduler::get().schedule(_border_gx.get(), Window::DimZ); + NEScheduler::get().schedule(_border_gy.get(), Window::DimZ); // Run harris score kernel NEScheduler::get().schedule(_harris_score.get(), Window::DimY); @@ -208,3 +215,4 @@ void NEHarrisCorners::run() // Run sort & euclidean distance NEScheduler::get().schedule(&_sort_euclidean, Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEHistogram.cpp b/src/runtime/NEON/functions/NEHistogram.cpp index 39fad977af..40ea3a16c6 100644 --- a/src/runtime/NEON/functions/NEHistogram.cpp +++ b/src/runtime/NEON/functions/NEHistogram.cpp @@ -29,8 +29,12 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEHistogramKernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEHistogram::~NEHistogram() = default; NEHistogram::NEHistogram() : _histogram_kernel(), _local_hist(), _window_lut(window_lut_default_size), _local_hist_size(0) @@ -47,11 +51,13 @@ void NEHistogram::configure(const IImage *input, IDistribution1D *output) _local_hist.resize(_local_hist_size); // Configure kernel - _histogram_kernel.configure(input, output, _local_hist.data(), _window_lut.data()); + _histogram_kernel = arm_compute::support::cpp14::make_unique(); + _histogram_kernel->configure(input, output, _local_hist.data(), _window_lut.data()); } void NEHistogram::run() { // Calculate histogram of input. - NEScheduler::get().schedule(&_histogram_kernel, Window::DimY); + NEScheduler::get().schedule(_histogram_kernel.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEIm2Col.cpp b/src/runtime/NEON/functions/NEIm2Col.cpp index 99e5d3f1df..bc0c60112e 100644 --- a/src/runtime/NEON/functions/NEIm2Col.cpp +++ b/src/runtime/NEON/functions/NEIm2Col.cpp @@ -25,9 +25,13 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEIm2ColKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEIm2Col::~NEIm2Col() = default; + NEIm2Col::NEIm2Col() : _kernel(), _y_dim(1) { @@ -37,7 +41,8 @@ void NEIm2Col::configure(const ITensor *input, ITensor *output, const Size2D &ke { _y_dim = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT); - _kernel.configure(input, output, kernel_dims, conv_info, has_bias, dilation, num_groups); + _kernel = arm_compute::support::cpp14::make_unique(); + _kernel->configure(input, output, kernel_dims, conv_info, has_bias, dilation, num_groups); } Status NEIm2Col::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, @@ -48,6 +53,6 @@ Status NEIm2Col::validate(const ITensorInfo *input, const ITensorInfo *output, c void NEIm2Col::run() { - NEScheduler::get().schedule(&_kernel, _y_dim); + NEScheduler::get().schedule(_kernel.get(), _y_dim); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEInstanceNormalizationLayer.cpp b/src/runtime/NEON/functions/NEInstanceNormalizationLayer.cpp index 57d01ff2d6..e3fb284796 100644 --- a/src/runtime/NEON/functions/NEInstanceNormalizationLayer.cpp +++ b/src/runtime/NEON/functions/NEInstanceNormalizationLayer.cpp @@ -26,9 +26,13 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEInstanceNormalizationLayer::~NEInstanceNormalizationLayer() = default; + NEInstanceNormalizationLayer::NEInstanceNormalizationLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _normalization_kernel(), _is_nchw(false), _permute_input(), _permute_output(), _permuted_input(), _permuted_output() { @@ -42,6 +46,8 @@ void NEInstanceNormalizationLayer::configure(ITensor *input, ITensor *output, fl // Configure Kernels _is_nchw = data_layout == DataLayout::NCHW; + _normalization_kernel = arm_compute::support::cpp14::make_unique(); + if(!_is_nchw) { _memory_group.manage(&_permuted_input); @@ -51,7 +57,7 @@ void NEInstanceNormalizationLayer::configure(ITensor *input, ITensor *output, fl _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U)); _permuted_input.info()->set_data_layout(DataLayout::NCHW); - _normalization_kernel.configure(&_permuted_input, &_permuted_output, kernel_descriptor); + _normalization_kernel->configure(&_permuted_input, &_permuted_output, kernel_descriptor); _permuted_output.info()->set_data_layout(DataLayout::NCHW); _permute_output.configure(&_permuted_output, output != nullptr ? output : input, PermutationVector(2U, 0U, 1U)); @@ -60,7 +66,7 @@ void NEInstanceNormalizationLayer::configure(ITensor *input, ITensor *output, fl } else { - _normalization_kernel.configure(input, output, kernel_descriptor); + _normalization_kernel->configure(input, output, kernel_descriptor); } } @@ -81,7 +87,7 @@ void NEInstanceNormalizationLayer::run() _permute_input.run(); } - NEScheduler::get().schedule(&_normalization_kernel, Window::DimZ); + NEScheduler::get().schedule(_normalization_kernel.get(), Window::DimZ); // Permute output if(!_is_nchw) diff --git a/src/runtime/NEON/functions/NEIntegralImage.cpp b/src/runtime/NEON/functions/NEIntegralImage.cpp index 8ab6bbd76d..63bcd53373 100644 --- a/src/runtime/NEON/functions/NEIntegralImage.cpp +++ b/src/runtime/NEON/functions/NEIntegralImage.cpp @@ -23,18 +23,25 @@ */ #include "arm_compute/runtime/NEON/functions/NEIntegralImage.h" -#include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEIntegralImageKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; +namespace arm_compute +{ +NEIntegralImage::~NEIntegralImage() = default; void NEIntegralImage::configure(const ITensor *input, ITensor *output) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output); _kernel = std::move(k); - _border_handler.configure(output, _kernel->border_size(), BorderMode::CONSTANT, PixelValue()); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(output, _kernel->border_size(), BorderMode::CONSTANT, PixelValue()); + _border_handler = std::move(b); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp b/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp index 04cf3a233a..4a99968cc3 100644 --- a/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp +++ b/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,9 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEL2NormalizeLayerKernel.h" +#include "src/core/NEON/kernels/NEReductionOperationKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { @@ -32,6 +35,7 @@ namespace { constexpr int max_input_tensor_dim = 3; } // namespace +NEL2NormalizeLayer::~NEL2NormalizeLayer() = default; NEL2NormalizeLayer::NEL2NormalizeLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq() @@ -46,7 +50,8 @@ void NEL2NormalizeLayer::configure(ITensor *input, ITensor *output, int axis, fl // Configure Kernels const uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim); _reduce_func.configure(input, &_sumsq, actual_axis, ReductionOperation::SUM_SQUARE); - _normalize_kernel.configure(input, &_sumsq, output, axis, epsilon); + _normalize_kernel = arm_compute::support::cpp14::make_unique(); + _normalize_kernel->configure(input, &_sumsq, output, axis, epsilon); // Allocate intermediate tensors _sumsq.allocator()->allocate(); @@ -78,6 +83,6 @@ void NEL2NormalizeLayer::run() MemoryGroupResourceScope scope_mg(_memory_group); _reduce_func.run(); - NEScheduler::get().schedule(&_normalize_kernel, Window::DimY); + NEScheduler::get().schedule(_normalize_kernel.get(), Window::DimY); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NELSTMLayer.cpp b/src/runtime/NEON/functions/NELSTMLayer.cpp index dca274acd2..48d69bd6fc 100644 --- a/src/runtime/NEON/functions/NELSTMLayer.cpp +++ b/src/runtime/NEON/functions/NELSTMLayer.cpp @@ -29,12 +29,24 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/common/LSTMParams.h" +#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" namespace arm_compute { using namespace arm_compute::misc::shape_calculator; using namespace arm_compute::utils::info_helpers; +NELSTMLayer::~NELSTMLayer() = default; + NELSTMLayer::NELSTMLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _fully_connected_input_gate(), _accum_input_gate1(), _subtract_input_gate(), _pixelwise_mul_input_gate(), _activation_input_gate(), _fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(), _transpose_cell_state(), @@ -575,8 +587,8 @@ Status NELSTMLayer::validate(const ITensorInfo *input, } // Validate copy kernel - ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(&cell_state_tmp, cell_state_out)); - ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(output_state_out, output)); + ARM_COMPUTE_RETURN_ON_ERROR(NECopy::validate(&cell_state_tmp, cell_state_out)); + ARM_COMPUTE_RETURN_ON_ERROR(NECopy::validate(output_state_out, output)); // Validate scratch concatenation std::vector inputs_vector_info_raw; @@ -646,7 +658,7 @@ void NELSTMLayer::run() } _fully_connected_cell_state.run(); - NEScheduler::get().schedule(&_transpose_cell_state, Window::DimY); + _transpose_cell_state.run(); _gemm_cell_state1.run(); _accum_cell_state1.run(); if(_is_layer_norm_lstm) @@ -691,8 +703,8 @@ void NELSTMLayer::run() } } - NEScheduler::get().schedule(&_copy_cell_state, Window::DimY); - NEScheduler::get().schedule(&_copy_output, Window::DimY); + _copy_cell_state.run(); + _copy_output.run(); _concat_scratch_buffer.run(); } diff --git a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp index 7610d15787..e43929390e 100644 --- a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp +++ b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp @@ -26,6 +26,16 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include @@ -42,6 +52,7 @@ const QuantizationInfo qsymm_3(8.f / 32768.f, 0); // qsymm16 with 3 integer bit const QuantizationInfo qsymm_4(16.f / 32768.f, 0); // qsymm16 with 4 integer bit const QuantizationInfo qsymm_0(1.f / 32768.f, 0); // qsymm16 with 0 integer bit } // namespace +NELSTMLayerQuantized::~NELSTMLayerQuantized() = default; NELSTMLayerQuantized::NELSTMLayerQuantized(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _gemmlowp(), _output_stage(), _transpose_weights(), _concat_input_weights(), _concat_recurrent_weights(), _concat_weights(), _concat_inputs(), diff --git a/src/runtime/NEON/functions/NELaplacianPyramid.cpp b/src/runtime/NEON/functions/NELaplacianPyramid.cpp index 4f0639b64b..a2651dbf36 100644 --- a/src/runtime/NEON/functions/NELaplacianPyramid.cpp +++ b/src/runtime/NEON/functions/NELaplacianPyramid.cpp @@ -29,11 +29,15 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" #include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" #include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" #include "arm_compute/runtime/Tensor.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h" +#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h" -using namespace arm_compute; +namespace arm_compute +{ +NELaplacianPyramid::~NELaplacianPyramid() = default; NELaplacianPyramid::NELaplacianPyramid() // NOLINT : _num_levels(0), @@ -105,3 +109,4 @@ void NELaplacianPyramid::configure(const ITensor *input, IPyramid *pyramid, ITen _gauss_pyr.allocate(); _conv_pyr.allocate(); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NELaplacianReconstruct.cpp b/src/runtime/NEON/functions/NELaplacianReconstruct.cpp index aa5f8a21ca..a50e7ccbef 100644 --- a/src/runtime/NEON/functions/NELaplacianReconstruct.cpp +++ b/src/runtime/NEON/functions/NELaplacianReconstruct.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h" +#include "arm_compute/core/CPP/ICPPKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/IPyramid.h" #include "arm_compute/core/ITensor.h" @@ -31,7 +32,9 @@ #include -using namespace arm_compute; +namespace arm_compute +{ +NELaplacianReconstruct::~NELaplacianReconstruct() = default; NELaplacianReconstruct::NELaplacianReconstruct() // NOLINT : _tmp_pyr(), @@ -100,3 +103,4 @@ void NELaplacianReconstruct::run() _depthf.run(); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp index af502be1e9..131ac82ba8 100644 --- a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,12 +27,16 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEIm2ColKernel.h" +#include "src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h" +#include "support/MemorySupport.h" #include #include -using namespace arm_compute; - +namespace arm_compute +{ namespace { void calculate_shapes(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, @@ -70,9 +74,10 @@ void calculate_shapes(const ITensorInfo *input, const ITensorInfo *weights, cons shape_gemm.set(1, mat_input_rows); } } // namespace +NELocallyConnectedLayer::~NELocallyConnectedLayer() = default; NELocallyConnectedLayer::NELocallyConnectedLayer(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(), + : _memory_group(std::move(memory_manager)), _input_im2col(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(), _is_prepared(false), _original_weights(nullptr) { } @@ -113,10 +118,10 @@ Status NELocallyConnectedLayer::validate(const ITensorInfo *input, const ITensor TensorInfo input_im2col_reshaped_info(shape_im2col, 1, input->data_type()); TensorInfo gemm_output_info(shape_gemm, 1, input->data_type()); - ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &input_im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, has_bias)); + ARM_COMPUTE_RETURN_ON_ERROR(NEIm2Col::validate(input, &input_im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, has_bias)); ARM_COMPUTE_RETURN_ON_ERROR(NEWeightsReshapeKernel::validate(weights, biases, &weights_reshaped_info)); ARM_COMPUTE_RETURN_ON_ERROR(NELocallyConnectedMatrixMultiplyKernel::validate(&input_im2col_reshaped_info, &weights_reshaped_info, &gemm_output_info)); - ARM_COMPUTE_RETURN_ON_ERROR(NECol2ImKernel::validate(&gemm_output_info, output, Size2D(conv_w, conv_h))); + ARM_COMPUTE_RETURN_ON_ERROR(NECol2Im::validate(&gemm_output_info, output, Size2D(conv_w, conv_h))); return Status{}; } @@ -154,10 +159,12 @@ void NELocallyConnectedLayer::configure(const ITensor *input, const ITensor *wei _memory_group.manage(&_gemm_output); // Configure kernels - _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias); - _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped); - _mm_kernel.configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output); - _output_col2im_kernel.configure(&_gemm_output, output, Size2D(conv_w, conv_h)); + _input_im2col.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias); + _weights_reshape_kernel = arm_compute::support::cpp14::make_unique(); + _weights_reshape_kernel->configure(weights, biases, &_weights_reshaped); + _mm_kernel = arm_compute::support::cpp14::make_unique(); + _mm_kernel->configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output); + _output_col2im.configure(&_gemm_output, output, Size2D(conv_w, conv_h)); // Allocate intermediate tensors _input_im2col_reshaped.allocator()->allocate(); @@ -171,13 +178,13 @@ void NELocallyConnectedLayer::run() MemoryGroupResourceScope scope_mg(_memory_group); // Run input reshaping - NEScheduler::get().schedule(&_input_im2col_kernel, Window::DimY); + _input_im2col.run(); // Runs GEMM on reshaped matrices - NEScheduler::get().schedule(&_mm_kernel, Window::DimX); + NEScheduler::get().schedule(_mm_kernel.get(), Window::DimX); // Reshape output matrix - NEScheduler::get().schedule(&_output_col2im_kernel, Window::DimY); + _output_col2im.run(); } void NELocallyConnectedLayer::prepare() @@ -188,9 +195,10 @@ void NELocallyConnectedLayer::prepare() // Run weights reshaping and mark original weights tensor as unused _weights_reshaped.allocator()->allocate(); - NEScheduler::get().schedule(&_weights_reshape_kernel, 3); + NEScheduler::get().schedule(_weights_reshape_kernel.get(), 3); _original_weights->mark_as_unused(); _is_prepared = true; } } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEMagnitude.cpp b/src/runtime/NEON/functions/NEMagnitude.cpp index 5ca672e1d6..06ed8d46c9 100644 --- a/src/runtime/NEON/functions/NEMagnitude.cpp +++ b/src/runtime/NEON/functions/NEMagnitude.cpp @@ -23,13 +23,15 @@ */ #include "arm_compute/runtime/NEON/functions/NEMagnitude.h" -#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; +namespace arm_compute +{ +NEMagnitude::~NEMagnitude() = default; void NEMagnitude::configure(const ITensor *input1, const ITensor *input2, ITensor *output, MagnitudeType mag_type) { @@ -46,3 +48,4 @@ void NEMagnitude::configure(const ITensor *input1, const ITensor *input2, ITenso _kernel = std::move(k); } } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp b/src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp index 9d3f34fba4..e8c9d09d95 100644 --- a/src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp +++ b/src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp @@ -25,9 +25,14 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h" +#include "src/core/NEON/kernels/NEMemsetKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEMaxUnpoolingLayer::~NEMaxUnpoolingLayer() = default; + NEMaxUnpoolingLayer::NEMaxUnpoolingLayer() : _memset_kernel(), _unpooling_layer_kernel() @@ -37,8 +42,10 @@ NEMaxUnpoolingLayer::NEMaxUnpoolingLayer() void NEMaxUnpoolingLayer::configure(ITensor *input, ITensor *indices, ITensor *output, const PoolingLayerInfo &pool_info) { const PixelValue zero_value(0.f); - _memset_kernel.configure(output, zero_value); - _unpooling_layer_kernel.configure(input, indices, output, pool_info); + _memset_kernel = arm_compute::support::cpp14::make_unique(); + _unpooling_layer_kernel = arm_compute::support::cpp14::make_unique(); + _memset_kernel->configure(output, zero_value); + _unpooling_layer_kernel->configure(input, indices, output, pool_info); } Status NEMaxUnpoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info) @@ -48,7 +55,7 @@ Status NEMaxUnpoolingLayer::validate(const ITensorInfo *input, const ITensorInfo void NEMaxUnpoolingLayer::run() { - NEScheduler::get().schedule(&_memset_kernel, Window::DimY); - NEScheduler::get().schedule(&_unpooling_layer_kernel, Window::DimY); + NEScheduler::get().schedule(_memset_kernel.get(), Window::DimY); + NEScheduler::get().schedule(_unpooling_layer_kernel.get(), Window::DimY); } } /* namespace arm_compute */ diff --git a/src/runtime/NEON/functions/NEMeanStdDev.cpp b/src/runtime/NEON/functions/NEMeanStdDev.cpp index 57363f05ff..e073420114 100644 --- a/src/runtime/NEON/functions/NEMeanStdDev.cpp +++ b/src/runtime/NEON/functions/NEMeanStdDev.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,8 +24,13 @@ #include "arm_compute/runtime/NEON/functions/NEMeanStdDev.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEMeanStdDevKernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEMeanStdDev::~NEMeanStdDev() = default; NEMeanStdDev::NEMeanStdDev() : _mean_stddev_kernel(), _fill_border_kernel(), _global_sum(0), _global_sum_squared(0) @@ -34,8 +39,11 @@ NEMeanStdDev::NEMeanStdDev() void NEMeanStdDev::configure(IImage *input, float *mean, float *stddev) { - _mean_stddev_kernel.configure(input, mean, &_global_sum, stddev, &_global_sum_squared); - _fill_border_kernel.configure(input, _mean_stddev_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0))); + _mean_stddev_kernel = arm_compute::support::cpp14::make_unique(); + _fill_border_kernel = arm_compute::support::cpp14::make_unique(); + + _mean_stddev_kernel->configure(input, mean, &_global_sum, stddev, &_global_sum_squared); + _fill_border_kernel->configure(input, _mean_stddev_kernel->border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0))); } void NEMeanStdDev::run() @@ -43,6 +51,7 @@ void NEMeanStdDev::run() _global_sum = 0; _global_sum_squared = 0; - NEScheduler::get().schedule(&_fill_border_kernel, Window::DimZ); - NEScheduler::get().schedule(&_mean_stddev_kernel, Window::DimY); + NEScheduler::get().schedule(_fill_border_kernel.get(), Window::DimZ); + NEScheduler::get().schedule(_mean_stddev_kernel.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp b/src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp index a88732b67d..d128c4456a 100644 --- a/src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp +++ b/src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp @@ -23,11 +23,13 @@ */ #include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h" -#include "arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h" +#include "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h" #include "support/MemorySupport.h" namespace arm_compute { +NEMeanStdDevNormalizationLayer::~NEMeanStdDevNormalizationLayer() = default; + void NEMeanStdDevNormalizationLayer::configure(ITensor *input, ITensor *output, float epsilon) { auto k = arm_compute::support::cpp14::make_unique(); diff --git a/src/runtime/NEON/functions/NEMedian3x3.cpp b/src/runtime/NEON/functions/NEMedian3x3.cpp index 2bbe8d39ae..b7b7c2cb47 100644 --- a/src/runtime/NEON/functions/NEMedian3x3.cpp +++ b/src/runtime/NEON/functions/NEMedian3x3.cpp @@ -23,18 +23,23 @@ */ #include "arm_compute/runtime/NEON/functions/NEMedian3x3.h" -#include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEMedian3x3Kernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NEMedian3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEMinMaxLocation.cpp b/src/runtime/NEON/functions/NEMinMaxLocation.cpp index ca63937770..3c2219ca07 100644 --- a/src/runtime/NEON/functions/NEMinMaxLocation.cpp +++ b/src/runtime/NEON/functions/NEMinMaxLocation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,8 +24,12 @@ #include "arm_compute/runtime/NEON/functions/NEMinMaxLocation.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEMinMaxLocationKernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEMinMaxLocation::~NEMinMaxLocation() = default; NEMinMaxLocation::NEMinMaxLocation() : _min_max(), _min_max_loc() @@ -34,17 +38,21 @@ NEMinMaxLocation::NEMinMaxLocation() void NEMinMaxLocation::configure(const IImage *input, void *min, void *max, ICoordinates2DArray *min_loc, ICoordinates2DArray *max_loc, uint32_t *min_count, uint32_t *max_count) { - _min_max.configure(input, min, max); - _min_max_loc.configure(input, min, max, min_loc, max_loc, min_count, max_count); + _min_max = arm_compute::support::cpp14::make_unique(); + _min_max->configure(input, min, max); + + _min_max_loc = arm_compute::support::cpp14::make_unique(); + _min_max_loc->configure(input, min, max, min_loc, max_loc, min_count, max_count); } void NEMinMaxLocation::run() { - _min_max.reset(); + _min_max->reset(); /* Run min max kernel */ - NEScheduler::get().schedule(&_min_max, Window::DimY); + NEScheduler::get().schedule(_min_max.get(), Window::DimY); /* Run min max location */ - NEScheduler::get().schedule(&_min_max_loc, Window::DimY); + NEScheduler::get().schedule(_min_max_loc.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NENonLinearFilter.cpp b/src/runtime/NEON/functions/NENonLinearFilter.cpp index b7c72acb9a..4d8fd00cbd 100644 --- a/src/runtime/NEON/functions/NENonLinearFilter.cpp +++ b/src/runtime/NEON/functions/NENonLinearFilter.cpp @@ -23,14 +23,15 @@ */ #include "arm_compute/runtime/NEON/functions/NENonLinearFilter.h" -#include "arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NENonLinearFilterKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NENonLinearFilter::configure(ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, uint8_t constant_border_value) @@ -38,5 +39,9 @@ void NENonLinearFilter::configure(ITensor *input, ITensor *output, NonLinearFilt auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, function, mask_size, pattern, mask, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp b/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp index 4d9edf7fc7..b8f5c251b7 100644 --- a/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp +++ b/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp @@ -23,25 +23,29 @@ */ #include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h" -#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NENonMaximaSuppression3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); + auto b = arm_compute::support::cpp14::make_unique(); if(border_mode != BorderMode::UNDEFINED) { - _border_handler.configure(input, BorderSize(1), BorderMode::CONSTANT, static_cast(0.f)); + b->configure(input, BorderSize(1), BorderMode::CONSTANT, static_cast(0.f)); } else { - _border_handler.configure(input, BorderSize(1), BorderMode::UNDEFINED, static_cast(0.f)); + b->configure(input, BorderSize(1), BorderMode::UNDEFINED, static_cast(0.f)); } + _border_handler = std::move(b); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NENormalizationLayer.cpp b/src/runtime/NEON/functions/NENormalizationLayer.cpp index 10ee938335..dfc73b2a57 100644 --- a/src/runtime/NEON/functions/NENormalizationLayer.cpp +++ b/src/runtime/NEON/functions/NENormalizationLayer.cpp @@ -29,9 +29,13 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NENormalizationLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NENormalizationLayer::~NENormalizationLayer() = default; + NENormalizationLayer::NENormalizationLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _norm_kernel(), _multiply_f(), _input_squared() { @@ -48,7 +52,8 @@ void NENormalizationLayer::configure(const ITensor *input, ITensor *output, cons _memory_group.manage(&_input_squared); // Configure kernels - _norm_kernel.configure(input, &_input_squared, output, norm_info); + _norm_kernel = arm_compute::support::cpp14::make_unique(); + _norm_kernel->configure(input, &_input_squared, output, norm_info); _multiply_f.configure(input, input, &_input_squared, 1.0f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO); // Allocate the tensor once the configure methods have been called @@ -70,6 +75,6 @@ void NENormalizationLayer::run() { MemoryGroupResourceScope scope_mg(_memory_group); _multiply_f.run(); - NEScheduler::get().schedule(&_norm_kernel, Window::DimY); + NEScheduler::get().schedule(_norm_kernel.get(), Window::DimY); } } \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEOpticalFlow.cpp b/src/runtime/NEON/functions/NEOpticalFlow.cpp index c9e07483e6..565346bfce 100644 --- a/src/runtime/NEON/functions/NEOpticalFlow.cpp +++ b/src/runtime/NEON/functions/NEOpticalFlow.cpp @@ -25,7 +25,6 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Window.h" #include "arm_compute/runtime/NEON/NEScheduler.h" @@ -33,8 +32,13 @@ #include "arm_compute/runtime/Pyramid.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NELKTrackerKernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEOpticalFlow::~NEOpticalFlow() = default; NEOpticalFlow::NEOpticalFlow(std::shared_ptr memory_manager) // NOLINT : _memory_group(std::move(memory_manager)), @@ -110,11 +114,12 @@ void NEOpticalFlow::configure(const Pyramid *old_pyramid, const Pyramid *new_pyr _func_scharr[i].configure(old_ith_input, &_scharr_gx[i], &_scharr_gy[i], border_mode, constant_border_value); // Init Lucas-Kanade kernel - _kernel_tracker[i].configure(old_ith_input, new_ith_input, &_scharr_gx[i], &_scharr_gy[i], - old_points, new_points_estimates, new_points, - &_old_points_internal, &_new_points_internal, - termination, use_initial_estimate, epsilon, num_iterations, window_dimension, - i, _num_levels, pyr_scale); + _kernel_tracker[i] = arm_compute::support::cpp14::make_unique(); + _kernel_tracker[i]->configure(old_ith_input, new_ith_input, &_scharr_gx[i], &_scharr_gy[i], + old_points, new_points_estimates, new_points, + &_old_points_internal, &_new_points_internal, + termination, use_initial_estimate, epsilon, num_iterations, window_dimension, + i, _num_levels, pyr_scale); _scharr_gx[i].allocator()->allocate(); _scharr_gy[i].allocator()->allocate(); @@ -133,6 +138,7 @@ void NEOpticalFlow::run() _func_scharr[level - 1].run(); // Run Lucas-Kanade kernel - NEScheduler::get().schedule(&_kernel_tracker[level - 1], Window::DimX); + NEScheduler::get().schedule(_kernel_tracker[level - 1].get(), Window::DimX); } } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEPReluLayer.cpp b/src/runtime/NEON/functions/NEPReluLayer.cpp index f9393a4d92..00a1a4257a 100644 --- a/src/runtime/NEON/functions/NEPReluLayer.cpp +++ b/src/runtime/NEON/functions/NEPReluLayer.cpp @@ -24,7 +24,7 @@ #include "arm_compute/runtime/NEON/functions/NEPReluLayer.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h" +#include "src/core/NEON/kernels/NEElementwiseOperationKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEPadLayer.cpp b/src/runtime/NEON/functions/NEPadLayer.cpp index 03c597a3bf..92659f39a2 100644 --- a/src/runtime/NEON/functions/NEPadLayer.cpp +++ b/src/runtime/NEON/functions/NEPadLayer.cpp @@ -27,7 +27,10 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/NEON/kernels/NECopyKernel.h" +#include "src/core/NEON/kernels/NEPadLayerKernel.h" #include "src/core/helpers/AutoConfiguration.h" +#include "support/MemorySupport.h" namespace arm_compute { @@ -47,6 +50,8 @@ uint32_t last_padding_dimension(const PaddingList &padding) } } // namespace +NEPadLayer::~NEPadLayer() = default; + NEPadLayer::NEPadLayer() : _copy_kernel(), _pad_kernel(), _mode(), _padding(), _num_dimensions(0), _slice_functions(), _concat_functions(), _slice_results(), _concat_results() { @@ -54,7 +59,8 @@ NEPadLayer::NEPadLayer() void NEPadLayer::configure_constant_mode(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value) { - _pad_kernel.configure(input, output, padding, constant_value, PaddingMode::CONSTANT); + _pad_kernel = arm_compute::support::cpp14::make_unique(); + _pad_kernel->configure(input, output, padding, constant_value, PaddingMode::CONSTANT); } void NEPadLayer::configure_reflect_symmetric_mode(ITensor *input, ITensor *output) @@ -195,7 +201,8 @@ void NEPadLayer::configure(ITensor *input, ITensor *output, const PaddingList &p else { // Copy the input to the whole output if no padding is applied - _copy_kernel.configure(input, output); + _copy_kernel = arm_compute::support::cpp14::make_unique(); + _copy_kernel->configure(input, output); } } @@ -251,7 +258,7 @@ void NEPadLayer::run() { case PaddingMode::CONSTANT: { - NEScheduler::get().schedule(&_pad_kernel, Window::DimZ); + NEScheduler::get().schedule(_pad_kernel.get(), Window::DimZ); break; } case PaddingMode::REFLECT: @@ -280,7 +287,7 @@ void NEPadLayer::run() } else { - NEScheduler::get().schedule(&_copy_kernel, Window::DimY); + NEScheduler::get().schedule(_copy_kernel.get(), Window::DimY); } } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEPermute.cpp b/src/runtime/NEON/functions/NEPermute.cpp index 698add86b9..d2a115fdc8 100644 --- a/src/runtime/NEON/functions/NEPermute.cpp +++ b/src/runtime/NEON/functions/NEPermute.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEPermute.h" -#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h" +#include "src/core/NEON/kernels/NEPermuteKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEPhase.cpp b/src/runtime/NEON/functions/NEPhase.cpp index 85779611cd..3b6182a269 100644 --- a/src/runtime/NEON/functions/NEPhase.cpp +++ b/src/runtime/NEON/functions/NEPhase.cpp @@ -23,13 +23,13 @@ */ #include "arm_compute/runtime/NEON/functions/NEPhase.h" -#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" +#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NEPhase::configure(const ITensor *input1, const ITensor *input2, ITensor *output, PhaseType phase_type) { if(phase_type == PhaseType::UNSIGNED) @@ -45,3 +45,4 @@ void NEPhase::configure(const ITensor *input1, const ITensor *input2, ITensor *o _kernel = std::move(k); } } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp b/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp index 4208878b75..f7f4437554 100644 --- a/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp +++ b/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp @@ -24,7 +24,7 @@ #include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" +#include "src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEPoolingLayer.cpp b/src/runtime/NEON/functions/NEPoolingLayer.cpp index 81bd00d44d..12ac8d6d7d 100644 --- a/src/runtime/NEON/functions/NEPoolingLayer.cpp +++ b/src/runtime/NEON/functions/NEPoolingLayer.cpp @@ -25,8 +25,13 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEPoolingLayerKernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NEPoolingLayer::~NEPoolingLayer() = default; NEPoolingLayer::NEPoolingLayer() : _pooling_layer_kernel(), _border_handler(), _is_global_pooling_layer(false), _data_layout(DataLayout::NCHW) @@ -42,7 +47,8 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay _data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? input->info()->data_layout() : pool_info.data_layout; // Configure pooling kernel - _pooling_layer_kernel.configure(input, output, pool_info, indices); + _pooling_layer_kernel = arm_compute::support::cpp14::make_unique(); + _pooling_layer_kernel->configure(input, output, pool_info, indices); switch(_data_layout) { @@ -55,7 +61,8 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay { zero_value = PixelValue(0, input->info()->data_type(), input->info()->quantization_info()); } - _border_handler.configure(input, _pooling_layer_kernel.border_size(), border_mode, zero_value); + _border_handler = arm_compute::support::cpp14::make_unique(); + _border_handler->configure(input, _pooling_layer_kernel->border_size(), border_mode, zero_value); break; } case DataLayout::NHWC: @@ -76,16 +83,18 @@ void NEPoolingLayer::run() { case DataLayout::NCHW: // Fill border - NEScheduler::get().schedule(&_border_handler, Window::DimY); + NEScheduler::get().schedule(_border_handler.get(), Window::DimY); // Run pooling layer - NEScheduler::get().schedule(&_pooling_layer_kernel, _is_global_pooling_layer ? Window::DimZ : Window::DimY); + NEScheduler::get().schedule(_pooling_layer_kernel.get(), _is_global_pooling_layer ? Window::DimZ : Window::DimY); break; case DataLayout::NHWC: // Run pooling layer - NEScheduler::get().schedule(&_pooling_layer_kernel, Window::DimX); + NEScheduler::get().schedule(_pooling_layer_kernel.get(), Window::DimX); break; default: ARM_COMPUTE_ERROR("Data layout not supported"); } } + +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEPriorBoxLayer.cpp b/src/runtime/NEON/functions/NEPriorBoxLayer.cpp index bcf6bef9c7..bfa06da04e 100644 --- a/src/runtime/NEON/functions/NEPriorBoxLayer.cpp +++ b/src/runtime/NEON/functions/NEPriorBoxLayer.cpp @@ -30,6 +30,7 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEPriorBoxLayerKernel.h" #include "support/MemorySupport.h" diff --git a/src/runtime/NEON/functions/NEQLSTMLayer.cpp b/src/runtime/NEON/functions/NEQLSTMLayer.cpp index e41962451c..1013730235 100644 --- a/src/runtime/NEON/functions/NEQLSTMLayer.cpp +++ b/src/runtime/NEON/functions/NEQLSTMLayer.cpp @@ -30,7 +30,16 @@ #include "arm_compute/core/utils/misc/InfoHelpers.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" #include "src/core/helpers/WindowHelpers.h" +#include "support/MemorySupport.h" namespace arm_compute { @@ -47,6 +56,31 @@ Status validate_mm(GEMMLowpOutputStageInfo &gemmlowp_info, const ITensorInfo *mm } } // namespace +Status NEQLSTMLayer::validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias) +{ + // Output quantization scale will be different, but ignored here + // since it will be configured at configure() stage. + const TensorInfo out + { + in + }; + return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias); +} + +void NEQLSTMLayer::configure_layer_norm(NEQLSTMLayer::LayerNormGate g, const ITensor *in) +{ + ARM_COMPUTE_ERROR_ON(!_has_layer_norm); + + Tensor &out = get_layer_norm_output(g); + _memory_group.manage(&out); + out.allocator()->init(*(in->info())); + + get_layer_norm(g) = arm_compute::support::cpp14::make_unique(); + get_layer_norm(g)->configure(in, &out, get_layer_norm_weight(g), get_layer_norm_bias(g)); +} + +NEQLSTMLayer::TensorCopyKernel::~TensorCopyKernel() = default; + Status NEQLSTMLayer::TensorCopyKernel::validate(const ITensorInfo &src, const ITensorInfo &dst) { ARM_COMPUTE_RETURN_ERROR_ON(src.tensor_shape().num_dimensions() > max_dimension_supported); @@ -77,7 +111,21 @@ void NEQLSTMLayer::TensorCopyKernel::run() input_iter, output_iter); } +NEQLSTMLayer::~NEQLSTMLayer() = default; + NEQLSTMLayer::NEQLSTMLayer(std::shared_ptr memory_manager) + : _memory_group(), _transpose_input_to_forget_weights(), _transpose_input_to_cell_weights(), _transpose_input_to_output_weights(), _transpose_input_to_input_weights(), + _transpose_recurrent_to_forget_weights(), _transpose_recurrent_to_cell_weights(), _transpose_recurrent_to_output_weights(), _transpose_recurrent_to_input_weights(), _transpose_projection_weights(), + _input_to_input_reduction(), _recurrent_to_input_reduction(), _input_to_forget_reduction(), _recurrent_to_forget_reduction(), _input_to_cell_reduction(), _recurrent_to_cell_reduction(), + _input_to_output_reduction(), _recurrent_to_output_reduction(), _projection_reduction(), _projection_bias_add(), _mm_input_to_forget(), _mm_recurrent_to_forget(), _pixelwise_mul_cell_to_forget(), + _input_to_forget_outstage(), _recurrent_to_forget_outstage(), _cell_to_forget_outstage(), _accumulate_input_recurrent_forget(), _accumulate_cell_forget(), _forget_gate_sigmoid(), _mm_input_to_cell(), + _input_to_cell_outstage(), _mm_recurrent_to_cell(), _recurrent_to_cell_outstage(), _accumulate_input_recurrent_modulation(), _cell_gate_tanh(), _input_gate_sub(), _mm_input_to_input(), + _input_to_input_outstage(), _mm_recurrent_to_input(), _recurrent_to_input_outstage(), _accumulate_input_recurrent_input(), _pixelwise_mul_cell_to_input(), _cell_to_input_outstage(), + _accumulate_cell_input(), _input_gate_sigmoid(), _pixelwise_mul_forget_cell(), _pixelwise_mul_input_cell(), _add_forget_cell(), _cell_clip(), _mm_input_to_output(), _input_to_output_outstage(), + _mm_recurrent_to_output(), _recurrent_to_output_outstage(), _accumulate_input_recurrent_output(), _pixelwise_mul_cell_to_output(), _cell_to_output_outstage(), _accumulate_cell_to_output(), + _output_gate_sigmoid(), _hidden_tanh(), _pixelwise_mul_hidden(), _hidden_outstage(), _mm_projection(), _projection_outstage(), _accumulate_projection(), _projection_clip(), _projection_bias_copy(), + _projection_output_to_accumulate_copy(), _projection_accumulate_to_output_copy(), _hidden_to_output_copy(), _layer_norms(), _copy_output(), _layer_norm_weights(), _layer_norm_bias(), + _layer_norm_output() { _memory_group = MemoryGroup(std::move(memory_manager)); } @@ -178,18 +226,29 @@ void NEQLSTMLayer::configure(const ITensor *input, _input_to_input_weights = lstm_params.input_to_input_weights(); _recurrent_to_input_weights = lstm_params.recurrent_to_input_weights(); - _input_to_input_reduction.configure(_input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); - _recurrent_to_input_reduction.configure(_recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); + _input_to_input_reduction = arm_compute::support::cpp14::make_unique(); + _recurrent_to_input_reduction = arm_compute::support::cpp14::make_unique(); + _input_to_input_reduction->configure(_input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); + _recurrent_to_input_reduction->configure(_recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); } - _input_to_forget_reduction.configure(input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); - _recurrent_to_forget_reduction.configure(recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); - _input_to_cell_reduction.configure(input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); - _recurrent_to_cell_reduction.configure(recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); - _input_to_output_reduction.configure(input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); - _recurrent_to_output_reduction.configure(recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); + + _input_to_forget_reduction = arm_compute::support::cpp14::make_unique(); + _recurrent_to_forget_reduction = arm_compute::support::cpp14::make_unique(); + _input_to_cell_reduction = arm_compute::support::cpp14::make_unique(); + _recurrent_to_cell_reduction = arm_compute::support::cpp14::make_unique(); + _input_to_output_reduction = arm_compute::support::cpp14::make_unique(); + _recurrent_to_output_reduction = arm_compute::support::cpp14::make_unique(); + + _recurrent_to_cell_reduction->configure(input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); + _recurrent_to_forget_reduction->configure(recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); + _input_to_cell_reduction->configure(input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); + _recurrent_to_cell_reduction->configure(recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); + _input_to_output_reduction->configure(input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)); + _recurrent_to_output_reduction->configure(recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)); if(_has_projection) { - _projection_reduction.configure(_projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true)); + _projection_reduction = arm_compute::support::cpp14::make_unique(); + _projection_reduction->configure(_projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true)); if(_projection_bias != nullptr) { _projection_bias_add.configure(_projection_bias, &_projection_eff_bias, &_projection_eff_bias, ConvertPolicy::SATURATE); @@ -878,7 +937,7 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input, ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output_state_in, output_state_out); } - ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(output_state_out, output)); + ARM_COMPUTE_RETURN_ON_ERROR(NECopy::validate(output_state_out, output)); return Status{}; } @@ -906,7 +965,7 @@ void NEQLSTMLayer::run() if(_has_layer_norm) { - NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Forget), Window::DimY); + NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Forget).get(), Window::DimY); } _forget_gate_sigmoid.run(); @@ -921,7 +980,7 @@ void NEQLSTMLayer::run() if(_has_layer_norm) { - NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Cell), Window::DimY); + NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Cell).get(), Window::DimY); } _cell_gate_tanh.run(); @@ -948,7 +1007,7 @@ void NEQLSTMLayer::run() if(_has_layer_norm) { - NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Input), Window::DimY); + NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Input).get(), Window::DimY); } _input_gate_sigmoid.run(); @@ -979,7 +1038,7 @@ void NEQLSTMLayer::run() if(_has_layer_norm) { - NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Output), Window::DimY); + NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Output).get(), Window::DimY); } _output_gate_sigmoid.run(); @@ -1021,7 +1080,7 @@ void NEQLSTMLayer::run() } // Copy output_state_out to output - NEScheduler::get().schedule(&_copy_output, Window::DimY); + _copy_output.run(); } void NEQLSTMLayer::prepare() @@ -1051,8 +1110,8 @@ void NEQLSTMLayer::prepare() { _input_to_input_eff_bias.allocator()->allocate(); _recurrent_to_input_eff_bias.allocator()->allocate(); - NEScheduler::get().schedule(&_input_to_input_reduction, Window::DimY); - NEScheduler::get().schedule(&_recurrent_to_input_reduction, Window::DimY); + NEScheduler::get().schedule(_input_to_input_reduction.get(), Window::DimY); + NEScheduler::get().schedule(_recurrent_to_input_reduction.get(), Window::DimY); _input_to_input_weights_transposed.allocator()->allocate(); _recurrent_to_input_weights_transposed.allocator()->allocate(); @@ -1067,17 +1126,17 @@ void NEQLSTMLayer::prepare() _recurrent_to_cell_eff_bias.allocator()->allocate(); _input_to_output_eff_bias.allocator()->allocate(); _recurrent_to_output_eff_bias.allocator()->allocate(); - NEScheduler::get().schedule(&_input_to_forget_reduction, Window::DimY); - NEScheduler::get().schedule(&_recurrent_to_forget_reduction, Window::DimY); - NEScheduler::get().schedule(&_input_to_cell_reduction, Window::DimY); - NEScheduler::get().schedule(&_recurrent_to_cell_reduction, Window::DimY); - NEScheduler::get().schedule(&_input_to_output_reduction, Window::DimY); - NEScheduler::get().schedule(&_recurrent_to_output_reduction, Window::DimY); + NEScheduler::get().schedule(_input_to_forget_reduction.get(), Window::DimY); + NEScheduler::get().schedule(_recurrent_to_forget_reduction.get(), Window::DimY); + NEScheduler::get().schedule(_input_to_cell_reduction.get(), Window::DimY); + NEScheduler::get().schedule(_recurrent_to_cell_reduction.get(), Window::DimY); + NEScheduler::get().schedule(_input_to_output_reduction.get(), Window::DimY); + NEScheduler::get().schedule(_recurrent_to_output_reduction.get(), Window::DimY); if(_has_projection) { _projection_eff_bias.allocator()->allocate(); - NEScheduler::get().schedule(&_projection_reduction, Window::DimY); + NEScheduler::get().schedule(_projection_reduction.get(), Window::DimY); if(_projection_bias != nullptr) { _projection_bias_add.run(); @@ -1106,5 +1165,4 @@ void NEQLSTMLayer::prepare() _is_prepared = true; } } - } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEQuantizationLayer.cpp b/src/runtime/NEON/functions/NEQuantizationLayer.cpp index c042705a72..a20ffb8858 100644 --- a/src/runtime/NEON/functions/NEQuantizationLayer.cpp +++ b/src/runtime/NEON/functions/NEQuantizationLayer.cpp @@ -26,6 +26,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/kernels/NEQuantizationLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NERNNLayer.cpp b/src/runtime/NEON/functions/NERNNLayer.cpp index b7415bd44c..a8e10482a7 100644 --- a/src/runtime/NEON/functions/NERNNLayer.cpp +++ b/src/runtime/NEON/functions/NERNNLayer.cpp @@ -30,9 +30,24 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NECopyKernel.h" +#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NERNNLayer::~NERNNLayer() = default; + NERNNLayer::NERNNLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_f(), _activation(), _fully_connected(memory_manager), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(), _is_prepared(false) @@ -99,7 +114,8 @@ void NERNNLayer::configure(const ITensor *input, const ITensor *weights, const I _activation.configure(&_add_output, hidden_state, info); _add_output.allocator()->allocate(); - _copy_kernel.configure(hidden_state, output); + _copy_kernel = arm_compute::support::cpp14::make_unique(); + _copy_kernel->configure(hidden_state, output); } void NERNNLayer::run() @@ -116,7 +132,7 @@ void NERNNLayer::run() _activation.run(); // copy hidden out to output - NEScheduler::get().schedule(&_copy_kernel, Window::DimY); + NEScheduler::get().schedule(_copy_kernel.get(), Window::DimY); } void NERNNLayer::prepare() diff --git a/src/runtime/NEON/functions/NEROIAlignLayer.cpp b/src/runtime/NEON/functions/NEROIAlignLayer.cpp index a3b116a55e..a046140551 100644 --- a/src/runtime/NEON/functions/NEROIAlignLayer.cpp +++ b/src/runtime/NEON/functions/NEROIAlignLayer.cpp @@ -23,7 +23,8 @@ */ #include "arm_compute/runtime/NEON/functions/NEROIAlignLayer.h" -#include "arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEROIAlignLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEROIPoolingLayer.cpp b/src/runtime/NEON/functions/NEROIPoolingLayer.cpp index 4aecadbc09..8bcf152881 100644 --- a/src/runtime/NEON/functions/NEROIPoolingLayer.cpp +++ b/src/runtime/NEON/functions/NEROIPoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,11 +24,14 @@ #include "arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEROIPoolingLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEROIPoolingLayer::~NEROIPoolingLayer() = default; + NEROIPoolingLayer::NEROIPoolingLayer() : _roi_kernel() { @@ -36,11 +39,12 @@ NEROIPoolingLayer::NEROIPoolingLayer() void NEROIPoolingLayer::configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info) { - _roi_kernel.configure(input, rois, output, pool_info); + _roi_kernel = arm_compute::support::cpp14::make_unique(); + _roi_kernel->configure(input, rois, output, pool_info); } void NEROIPoolingLayer::run() { - NEScheduler::get().schedule(&_roi_kernel, Window::DimX); + NEScheduler::get().schedule(_roi_kernel.get(), Window::DimX); } } // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NERange.cpp b/src/runtime/NEON/functions/NERange.cpp index 138b458fab..ba166b2d58 100644 --- a/src/runtime/NEON/functions/NERange.cpp +++ b/src/runtime/NEON/functions/NERange.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,13 @@ #include "arm_compute/runtime/NEON/functions/NERange.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NERangeKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NERange::~NERange() = default; + NERange::NERange() : _kernel() { @@ -34,7 +38,8 @@ NERange::NERange() void NERange::configure(ITensor *output, const float start, const float end, const float step) { - _kernel.configure(output, start, end, step); + _kernel = arm_compute::support::cpp14::make_unique(); + _kernel->configure(output, start, end, step); } Status NERange::validate(const ITensorInfo *output, const float start, const float end, const float step) @@ -44,6 +49,6 @@ Status NERange::validate(const ITensorInfo *output, const float start, const flo void NERange::run() { - NEScheduler::get().schedule(&_kernel, Window::DimX); + NEScheduler::get().schedule(_kernel.get(), Window::DimX); } } // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEReduceMean.cpp b/src/runtime/NEON/functions/NEReduceMean.cpp index c3c5529c09..b50a925f44 100644 --- a/src/runtime/NEON/functions/NEReduceMean.cpp +++ b/src/runtime/NEON/functions/NEReduceMean.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "src/core/CPP/Validate.h" +#include "src/core/NEON/kernels/NEReductionOperationKernel.h" #include "src/core/helpers/AutoConfiguration.h" namespace arm_compute @@ -96,6 +97,8 @@ Status validate_config(const ITensorInfo *input, const Coordinates &reduction_ax } } // namespace +NEReduceMean::~NEReduceMean() = default; + NEReduceMean::NEReduceMean(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), _dequant(), _requant(), _reduction_ops(), _keep_dims(), _do_requant(), _input_no_quant(), _output_no_quant() diff --git a/src/runtime/NEON/functions/NEReductionOperation.cpp b/src/runtime/NEON/functions/NEReductionOperation.cpp index 4938a56b3f..463b65ec28 100644 --- a/src/runtime/NEON/functions/NEReductionOperation.cpp +++ b/src/runtime/NEON/functions/NEReductionOperation.cpp @@ -26,7 +26,9 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEReductionOperationKernel.h" #include "src/core/helpers/AutoConfiguration.h" +#include "support/MemorySupport.h" namespace arm_compute { @@ -54,6 +56,8 @@ size_t reduction_window_split_dimension(unsigned int axis) } } // namespace +NEReductionOperation::~NEReductionOperation() = default; + NEReductionOperation::NEReductionOperation(std::shared_ptr memory_manager) : _memory_group(memory_manager), _reduction_kernel(), _reshape(), _output_internal(), _window_split(0), _reduction_axis(), _is_reshape_required(false) { @@ -125,7 +129,8 @@ void NEReductionOperation::configure(ITensor *input, ITensor *output, unsigned i ARM_COMPUTE_ERROR_THROW_ON(NEReductionOperation::validate(input->info(), output->info(), axis, op, keep_dims)); // Configure reduction kernel - _reduction_kernel.configure(input, output_internal, axis, op); + _reduction_kernel = arm_compute::support::cpp14::make_unique(); + _reduction_kernel->configure(input, output_internal, axis, op); _window_split = reduction_window_split_dimension(axis); _reduction_axis = axis; @@ -139,7 +144,7 @@ void NEReductionOperation::configure(ITensor *input, ITensor *output, unsigned i void NEReductionOperation::run() { MemoryGroupResourceScope scope_mg(_memory_group); - NEScheduler::get().schedule(&_reduction_kernel, _window_split); + NEScheduler::get().schedule(_reduction_kernel.get(), _window_split); if(_is_reshape_required) { _reshape.run(); diff --git a/src/runtime/NEON/functions/NERemap.cpp b/src/runtime/NEON/functions/NERemap.cpp index d4e7f838c6..9276d49cf5 100644 --- a/src/runtime/NEON/functions/NERemap.cpp +++ b/src/runtime/NEON/functions/NERemap.cpp @@ -25,17 +25,18 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NERemapKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NERemapKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NERemap::configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); @@ -45,9 +46,11 @@ void NERemap::configure(ITensor *input, const ITensor *map_x, const ITensor *map ARM_COMPUTE_ERROR_ON_MSG(policy == InterpolationPolicy::AREA, "Area interpolation is not supported"); auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input, map_x, map_y, output, policy); - _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEReorgLayer.cpp b/src/runtime/NEON/functions/NEReorgLayer.cpp index dfe002a503..77ec7fbfb1 100644 --- a/src/runtime/NEON/functions/NEReorgLayer.cpp +++ b/src/runtime/NEON/functions/NEReorgLayer.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEReorgLayer.h" -#include "arm_compute/core/NEON/kernels/NEReorgLayerKernel.h" +#include "src/core/NEON/kernels/NEReorgLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEReshapeLayer.cpp b/src/runtime/NEON/functions/NEReshapeLayer.cpp index c1c88c1c7a..915d5d408f 100644 --- a/src/runtime/NEON/functions/NEReshapeLayer.cpp +++ b/src/runtime/NEON/functions/NEReshapeLayer.cpp @@ -23,10 +23,10 @@ */ #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" -#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/Types.h" +#include "src/core/NEON/kernels/NEReshapeLayerKernel.h" #include "support/MemorySupport.h" #include @@ -35,6 +35,8 @@ namespace arm_compute { namespace experimental { +NEReshape::~NEReshape() = default; + void NEReshape::configure(const ITensorInfo *input, ITensorInfo *output) { auto k = arm_compute::support::cpp14::make_unique(); diff --git a/src/runtime/NEON/functions/NEReverse.cpp b/src/runtime/NEON/functions/NEReverse.cpp index c60c84e897..3ed0688386 100644 --- a/src/runtime/NEON/functions/NEReverse.cpp +++ b/src/runtime/NEON/functions/NEReverse.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEReverse.h" -#include "arm_compute/core/NEON/kernels/NEReverseKernel.h" +#include "src/core/NEON/kernels/NEReverseKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp index bbf8343c2b..0290fe5a01 100644 --- a/src/runtime/NEON/functions/NEScale.cpp +++ b/src/runtime/NEON/functions/NEScale.cpp @@ -32,6 +32,7 @@ #include "arm_compute/core/Window.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEScaleKernel.h" #include "src/core/utils/ScaleUtils.h" diff --git a/src/runtime/NEON/functions/NEScharr3x3.cpp b/src/runtime/NEON/functions/NEScharr3x3.cpp index bf787e1440..cea0eefdb0 100644 --- a/src/runtime/NEON/functions/NEScharr3x3.cpp +++ b/src/runtime/NEON/functions/NEScharr3x3.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" -#include "arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEScharr3x3Kernel.h" #include "support/MemorySupport.h" #include @@ -36,5 +37,8 @@ void NEScharr3x3::configure(ITensor *input, ITensor *output_x, ITensor *output_y auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } diff --git a/src/runtime/NEON/functions/NESelect.cpp b/src/runtime/NEON/functions/NESelect.cpp index 8def123c5d..0d1f490767 100644 --- a/src/runtime/NEON/functions/NESelect.cpp +++ b/src/runtime/NEON/functions/NESelect.cpp @@ -23,8 +23,8 @@ */ #include "arm_compute/runtime/NEON/functions/NESelect.h" -#include "arm_compute/core/NEON/kernels/NESelectKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/NEON/kernels/NESelectKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NESlice.cpp b/src/runtime/NEON/functions/NESlice.cpp index 2bacf2ee2a..dd56eaba8b 100644 --- a/src/runtime/NEON/functions/NESlice.cpp +++ b/src/runtime/NEON/functions/NESlice.cpp @@ -24,10 +24,10 @@ #include "arm_compute/runtime/NEON/functions/NESlice.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/helpers/tensor_transform.h" +#include "src/core/NEON/kernels/NEStridedSliceKernel.h" #include "support/MemorySupport.h" diff --git a/src/runtime/NEON/functions/NESobel3x3.cpp b/src/runtime/NEON/functions/NESobel3x3.cpp index cfd68d70af..38d2dc227e 100644 --- a/src/runtime/NEON/functions/NESobel3x3.cpp +++ b/src/runtime/NEON/functions/NESobel3x3.cpp @@ -23,18 +23,23 @@ */ #include "arm_compute/runtime/NEON/functions/NESobel3x3.h" -#include "arm_compute/core/NEON/kernels/NESobel3x3Kernel.h" #include "arm_compute/core/PixelValue.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NESobel3x3Kernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NESobel3x3::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NESobel5x5.cpp b/src/runtime/NEON/functions/NESobel5x5.cpp index 092c510bcf..e631fb3ed7 100644 --- a/src/runtime/NEON/functions/NESobel5x5.cpp +++ b/src/runtime/NEON/functions/NESobel5x5.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,8 +29,13 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NESobel5x5Kernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NESobel5x5::~NESobel5x5() = default; NESobel5x5::NESobel5x5(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler() @@ -46,14 +51,18 @@ void NESobel5x5::configure(ITensor *input, ITensor *output_x, ITensor *output_y, TensorInfo tensor_info(input->info()->tensor_shape(), Format::S16); + _sobel_hor = arm_compute::support::cpp14::make_unique(); + _sobel_vert = arm_compute::support::cpp14::make_unique(); + _border_handler = arm_compute::support::cpp14::make_unique(); + if(run_sobel_x && run_sobel_y) { _tmp_x.allocator()->init(tensor_info); _tmp_y.allocator()->init(tensor_info); _memory_group.manage(&_tmp_x); _memory_group.manage(&_tmp_y); - _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); _tmp_x.allocator()->allocate(); _tmp_y.allocator()->allocate(); } @@ -61,28 +70,29 @@ void NESobel5x5::configure(ITensor *input, ITensor *output_x, ITensor *output_y, { _tmp_x.allocator()->init(tensor_info); _memory_group.manage(&_tmp_x); - _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); _tmp_x.allocator()->allocate(); } else if(run_sobel_y) { _tmp_y.allocator()->init(tensor_info); _memory_group.manage(&_tmp_y); - _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); _tmp_y.allocator()->allocate(); } - _border_handler.configure(input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value)); } void NESobel5x5::run() { - NEScheduler::get().schedule(&_border_handler, Window::DimZ); + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); MemoryGroupResourceScope scope_mg(_memory_group); - NEScheduler::get().schedule(&_sobel_hor, Window::DimY); - NEScheduler::get().schedule(&_sobel_vert, Window::DimY); + NEScheduler::get().schedule(_sobel_hor.get(), Window::DimY); + NEScheduler::get().schedule(_sobel_vert.get(), Window::DimY); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NESobel7x7.cpp b/src/runtime/NEON/functions/NESobel7x7.cpp index 87ec81f7b0..bc5f87c1ec 100644 --- a/src/runtime/NEON/functions/NESobel7x7.cpp +++ b/src/runtime/NEON/functions/NESobel7x7.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,8 +29,13 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NESobel7x7Kernel.h" +#include "support/MemorySupport.h" -using namespace arm_compute; +namespace arm_compute +{ +NESobel7x7::~NESobel7x7() = default; NESobel7x7::NESobel7x7(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler() @@ -45,6 +50,9 @@ void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y, const bool run_sobel_y = output_y != nullptr; TensorInfo tensor_info(input->info()->tensor_shape(), Format::S32); + _sobel_hor = arm_compute::support::cpp14::make_unique(); + _sobel_vert = arm_compute::support::cpp14::make_unique(); + _border_handler = arm_compute::support::cpp14::make_unique(); if(run_sobel_x && run_sobel_y) { @@ -52,8 +60,8 @@ void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y, _tmp_y.allocator()->init(tensor_info); _memory_group.manage(&_tmp_x); _memory_group.manage(&_tmp_y); - _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); _tmp_x.allocator()->allocate(); _tmp_y.allocator()->allocate(); } @@ -61,28 +69,29 @@ void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y, { _tmp_x.allocator()->init(tensor_info); _memory_group.manage(&_tmp_x); - _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); _tmp_x.allocator()->allocate(); } else if(run_sobel_y) { _tmp_y.allocator()->init(tensor_info); _memory_group.manage(&_tmp_y); - _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); - _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); + _sobel_hor->configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert->configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); _tmp_y.allocator()->allocate(); } - _border_handler.configure(input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler->configure(input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value)); } void NESobel7x7::run() { - NEScheduler::get().schedule(&_border_handler, Window::DimZ); + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); MemoryGroupResourceScope scope_mg(_memory_group); - NEScheduler::get().schedule(&_sobel_hor, Window::DimY); - NEScheduler::get().schedule(&_sobel_vert, Window::DimY); + NEScheduler::get().schedule(_sobel_hor.get(), Window::DimY); + NEScheduler::get().schedule(_sobel_vert.get(), Window::DimY); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NESoftmaxLayer.cpp b/src/runtime/NEON/functions/NESoftmaxLayer.cpp index 4f773861d2..e79ab0ee2d 100644 --- a/src/runtime/NEON/functions/NESoftmaxLayer.cpp +++ b/src/runtime/NEON/functions/NESoftmaxLayer.cpp @@ -24,13 +24,19 @@ #include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h" +#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h" #include "src/core/helpers/SoftmaxHelpers.h" +#include "support/MemorySupport.h" namespace arm_compute { +template +NESoftmaxLayerGeneric::~NESoftmaxLayerGeneric() = default; + template NESoftmaxLayerGeneric::NESoftmaxLayerGeneric(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _permute_input(), _permute_output(), _max_kernel(), _softmax_kernel(), _fill_border_kernel(), _max(), _tmp(), _input_permuted(), _output_permuted(), @@ -76,15 +82,17 @@ void NESoftmaxLayerGeneric::configure(ITensor *input, ITensor *output, f _memory_group.manage(&_max); _memory_group.manage(&_tmp); - // Configure Kernels - _max_kernel.configure(tmp_input, &_max); + // Configure kernels + _max_kernel = arm_compute::support::cpp14::make_unique(); + _softmax_kernel = arm_compute::support::cpp14::make_unique>(); + _max_kernel->configure(tmp_input, &_max); if(_needs_permute) { // Add to the memory manager _output_permuted _memory_group.manage(&_output_permuted); // The normalization kernel stores the result in a permuted output tensor - _softmax_kernel.configure(tmp_input, &_max, &_output_permuted, beta, &_tmp); + _softmax_kernel->configure(tmp_input, &_max, &_output_permuted, beta, &_tmp); _input_permuted.allocator()->allocate(); // Re-permute the permuted output into the requested (4D) output @@ -96,8 +104,9 @@ void NESoftmaxLayerGeneric::configure(ITensor *input, ITensor *output, f else { // Softmax 2D case - _fill_border_kernel.configure(tmp_input, _max_kernel.border_size(), BorderMode::REPLICATE); - _softmax_kernel.configure(tmp_input, &_max, output, beta, &_tmp); + _fill_border_kernel = arm_compute::support::cpp14::make_unique(); + _fill_border_kernel->configure(tmp_input, _max_kernel->border_size(), BorderMode::REPLICATE); + _softmax_kernel->configure(tmp_input, &_max, output, beta, &_tmp); } // Allocate intermediate buffers @@ -152,10 +161,13 @@ void NESoftmaxLayerGeneric::run() { _permute_input.run(); } + else + { + NEScheduler::get().schedule(_fill_border_kernel.get(), Window::DimY); + } - NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY); - NEScheduler::get().schedule(&_max_kernel, Window::DimY); - NEScheduler::get().schedule(&_softmax_kernel, Window::DimY); + NEScheduler::get().schedule(_max_kernel.get(), Window::DimY); + NEScheduler::get().schedule(_softmax_kernel.get(), Window::DimY); if(_needs_permute) { diff --git a/src/runtime/NEON/functions/NESpaceToBatchLayer.cpp b/src/runtime/NEON/functions/NESpaceToBatchLayer.cpp index 97e793f6fb..516e8d604c 100644 --- a/src/runtime/NEON/functions/NESpaceToBatchLayer.cpp +++ b/src/runtime/NEON/functions/NESpaceToBatchLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,9 +29,14 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEMemsetKernel.h" +#include "src/core/NEON/kernels/NESpaceToBatchLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NESpaceToBatchLayer::~NESpaceToBatchLayer() = default; + NESpaceToBatchLayer::NESpaceToBatchLayer() : _space_to_batch_kernel(), _memset_kernel(), _has_padding(false) { @@ -43,10 +48,12 @@ void NESpaceToBatchLayer::configure(const ITensor *input, const ITensor *block_s if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size()) { - _has_padding = true; - _memset_kernel.configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info())); + _has_padding = true; + _memset_kernel = arm_compute::support::cpp14::make_unique(); + _memset_kernel->configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info())); } - _space_to_batch_kernel.configure(input, block_shape, paddings, output); + _space_to_batch_kernel = arm_compute::support::cpp14::make_unique(); + _space_to_batch_kernel->configure(input, block_shape, paddings, output); } void NESpaceToBatchLayer::configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output) @@ -55,10 +62,12 @@ void NESpaceToBatchLayer::configure(const ITensor *input, const int block_shape_ if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size()) { - _has_padding = true; - _memset_kernel.configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info())); + _has_padding = true; + _memset_kernel = arm_compute::support::cpp14::make_unique(); + _memset_kernel->configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info())); } - _space_to_batch_kernel.configure(input, block_shape_x, block_shape_y, padding_left, padding_right, output); + _space_to_batch_kernel = arm_compute::support::cpp14::make_unique(); + _space_to_batch_kernel->configure(input, block_shape_x, block_shape_y, padding_left, padding_right, output); } Status NESpaceToBatchLayer::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output) @@ -81,8 +90,8 @@ void NESpaceToBatchLayer::run() // Zero out output only if we have paddings if(_has_padding) { - NEScheduler::get().schedule(&_memset_kernel, Window::DimY); + NEScheduler::get().schedule(_memset_kernel.get(), Window::DimY); } - NEScheduler::get().schedule(&_space_to_batch_kernel, Window::DimY); + NEScheduler::get().schedule(_space_to_batch_kernel.get(), Window::DimY); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NESpaceToDepthLayer.cpp b/src/runtime/NEON/functions/NESpaceToDepthLayer.cpp index 3e1ec80687..a834600199 100644 --- a/src/runtime/NEON/functions/NESpaceToDepthLayer.cpp +++ b/src/runtime/NEON/functions/NESpaceToDepthLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,9 +29,13 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NESpaceToDepthLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NESpaceToDepthLayer::~NESpaceToDepthLayer() = default; + NESpaceToDepthLayer::NESpaceToDepthLayer() : _space_to_depth_kernel() { @@ -40,7 +44,8 @@ NESpaceToDepthLayer::NESpaceToDepthLayer() void NESpaceToDepthLayer::configure(const ITensor *input, ITensor *output, int32_t block_shape) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - _space_to_depth_kernel.configure(input, output, block_shape); + _space_to_depth_kernel = arm_compute::support::cpp14::make_unique(); + _space_to_depth_kernel->configure(input, output, block_shape); } Status NESpaceToDepthLayer::validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape) @@ -51,6 +56,6 @@ Status NESpaceToDepthLayer::validate(const ITensorInfo *input, const ITensorInfo void NESpaceToDepthLayer::run() { - NEScheduler::get().schedule(&_space_to_depth_kernel, Window::DimY); + NEScheduler::get().schedule(_space_to_depth_kernel.get(), Window::DimY); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEStackLayer.cpp b/src/runtime/NEON/functions/NEStackLayer.cpp index a99a95ab2a..e38ff6bee7 100644 --- a/src/runtime/NEON/functions/NEStackLayer.cpp +++ b/src/runtime/NEON/functions/NEStackLayer.cpp @@ -30,9 +30,13 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEStackLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEStackLayer::~NEStackLayer() = default; + NEStackLayer::NEStackLayer() // NOLINT : _input(), _stack_kernels(), @@ -50,7 +54,8 @@ void NEStackLayer::configure(const std::vector &input, int axis, ITen for(unsigned int i = 0; i < _num_inputs; i++) { - _stack_kernels[i].configure(input[i], axis_u, i, _num_inputs, output); + _stack_kernels[i] = arm_compute::support::cpp14::make_unique(); + _stack_kernels[i]->configure(input[i], axis_u, i, _num_inputs, output); } } @@ -80,7 +85,7 @@ void NEStackLayer::run() { for(unsigned i = 0; i < _num_inputs; i++) { - NEScheduler::get().schedule(&_stack_kernels[i], Window::DimY); + NEScheduler::get().schedule(_stack_kernels[i].get(), Window::DimY); } } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEStridedSlice.cpp b/src/runtime/NEON/functions/NEStridedSlice.cpp index 8bf81e8270..308b856ec6 100644 --- a/src/runtime/NEON/functions/NEStridedSlice.cpp +++ b/src/runtime/NEON/functions/NEStridedSlice.cpp @@ -24,8 +24,8 @@ #include "arm_compute/runtime/NEON/functions/NEStridedSlice.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h" #include "arm_compute/core/Types.h" +#include "src/core/NEON/kernels/NEStridedSliceKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NETableLookup.cpp b/src/runtime/NEON/functions/NETableLookup.cpp index b8d765f76b..9295bf0ece 100644 --- a/src/runtime/NEON/functions/NETableLookup.cpp +++ b/src/runtime/NEON/functions/NETableLookup.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NETableLookup.h" -#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h" +#include "src/core/NEON/kernels/NETableLookupKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEThreshold.cpp b/src/runtime/NEON/functions/NEThreshold.cpp index e21511ed65..2f1e3047b5 100644 --- a/src/runtime/NEON/functions/NEThreshold.cpp +++ b/src/runtime/NEON/functions/NEThreshold.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEThreshold.h" -#include "arm_compute/core/NEON/kernels/NEThresholdKernel.h" +#include "src/core/NEON/kernels/NEThresholdKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NETile.cpp b/src/runtime/NEON/functions/NETile.cpp index 6fda3a5ba6..6a1e20ddf8 100644 --- a/src/runtime/NEON/functions/NETile.cpp +++ b/src/runtime/NEON/functions/NETile.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NETile.h" -#include "arm_compute/core/NEON/kernels/NETileKernel.h" +#include "src/core/NEON/kernels/NETileKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NETranspose.cpp b/src/runtime/NEON/functions/NETranspose.cpp index 88d1672173..5af417f4ed 100644 --- a/src/runtime/NEON/functions/NETranspose.cpp +++ b/src/runtime/NEON/functions/NETranspose.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NETranspose.h" -#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" +#include "src/core/NEON/kernels/NETransposeKernel.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEUpsampleLayer.cpp b/src/runtime/NEON/functions/NEUpsampleLayer.cpp index 58c050f904..aae58387e2 100644 --- a/src/runtime/NEON/functions/NEUpsampleLayer.cpp +++ b/src/runtime/NEON/functions/NEUpsampleLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,10 +23,13 @@ */ #include "arm_compute/runtime/NEON/functions/NEUpsampleLayer.h" -#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h" +#include "src/core/NEON/kernels/NEUpsampleLayerKernel.h" +#include "support/MemorySupport.h" namespace arm_compute { +NEUpsampleLayer::~NEUpsampleLayer() = default; + NEUpsampleLayer::NEUpsampleLayer() : _kernel(), _data_layout() { @@ -41,12 +44,13 @@ Status NEUpsampleLayer::validate(const ITensorInfo *input, const ITensorInfo *ou void NEUpsampleLayer::configure(const ITensor *input, ITensor *output, const Size2D &info, const InterpolationPolicy &policy) { _data_layout = input->info()->data_layout(); - _kernel.configure(input, output, info, policy); + _kernel = arm_compute::support::cpp14::make_unique(); + _kernel->configure(input, output, info, policy); } void NEUpsampleLayer::run() { const auto win = (_data_layout == DataLayout::NCHW) ? Window::DimZ : Window::DimX; - NEScheduler::get().schedule(&_kernel, win); + NEScheduler::get().schedule(_kernel.get(), win); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEWarpAffine.cpp b/src/runtime/NEON/functions/NEWarpAffine.cpp index ec2c6883ba..b5dbfe0d5c 100644 --- a/src/runtime/NEON/functions/NEWarpAffine.cpp +++ b/src/runtime/NEON/functions/NEWarpAffine.cpp @@ -24,8 +24,9 @@ #include "arm_compute/runtime/NEON/functions/NEWarpAffine.h" #include "arm_compute/core/Error.h" -#include "arm_compute/core/NEON/kernels/NEWarpKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEWarpKernel.h" #include "support/MemorySupport.h" #include @@ -58,5 +59,7 @@ void NEWarpAffine::configure(ITensor *input, ITensor *output, const std::arrayborder_size(), border_mode, constant_border_value); + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, constant_border_value); + _border_handler = std::move(b); } diff --git a/src/runtime/NEON/functions/NEWarpPerspective.cpp b/src/runtime/NEON/functions/NEWarpPerspective.cpp index bf361b8ab9..8d42121005 100644 --- a/src/runtime/NEON/functions/NEWarpPerspective.cpp +++ b/src/runtime/NEON/functions/NEWarpPerspective.cpp @@ -24,14 +24,15 @@ #include "arm_compute/runtime/NEON/functions/NEWarpPerspective.h" #include "arm_compute/core/Error.h" -#include "arm_compute/core/NEON/kernels/NEWarpKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEWarpKernel.h" #include "support/MemorySupport.h" #include -using namespace arm_compute; - +namespace arm_compute +{ void NEWarpPerspective::configure(ITensor *input, ITensor *output, const std::array &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); @@ -58,5 +59,8 @@ void NEWarpPerspective::configure(ITensor *input, ITensor *output, const std::ar ARM_COMPUTE_ERROR("Interpolation type not supported"); } - _border_handler.configure(input, _kernel->border_size(), border_mode, constant_border_value); + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, constant_border_value); + _border_handler = std::move(b); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp index 23b9f60c38..1cb2458e13 100644 --- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp @@ -30,6 +30,10 @@ #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "src/core/CPP/Validate.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h" #include "support/MemorySupport.h" diff --git a/src/runtime/NEON/functions/NEYOLOLayer.cpp b/src/runtime/NEON/functions/NEYOLOLayer.cpp index 233afb727a..5cad53bffd 100644 --- a/src/runtime/NEON/functions/NEYOLOLayer.cpp +++ b/src/runtime/NEON/functions/NEYOLOLayer.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEYOLOLayer.h" -#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h" +#include "src/core/NEON/kernels/NEYOLOLayerKernel.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/TracePoint.cpp b/src/runtime/TracePoint.cpp index a4228b2b21..6cb672c348 100644 --- a/src/runtime/TracePoint.cpp +++ b/src/runtime/TracePoint.cpp @@ -25,10 +25,10 @@ #include #include -#include "arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp" #include "arm_compute/runtime/Array.h" #include "arm_compute/runtime/Pyramid.h" #include "arm_compute/runtime/common/LSTMParams.h" +#include "src/core/NEON/kernels/assembly/arm_gemm.hpp" #include "utils/TypePrinter.h" namespace arm_compute diff --git a/tests/NEON/Helper.h b/tests/NEON/Helper.h index d1ae37ec78..ea47a416b1 100644 --- a/tests/NEON/Helper.h +++ b/tests/NEON/Helper.h @@ -26,6 +26,8 @@ #include "arm_compute/runtime/Array.h" #include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "support/MemorySupport.h" #include "tests/Globals.h" @@ -52,7 +54,7 @@ void fill_tensors(D &&dist, std::initializer_list seeds, T &&tensor, Ts &&. /** This template synthetizes an INESimpleFunction which runs the given kernel K */ template -class NESynthetizeFunction : public INESimpleFunction +class NESynthetizeFunction : public INESimpleFunctionNoBorder { public: /** Configure the kernel. @@ -93,7 +95,10 @@ public: auto k = arm_compute::support::cpp14::make_unique(); k->configure(first, std::forward(args)...); _kernel = std::move(k); - _border_handler.configure(first, BorderSize(bordersize), BorderMode::CONSTANT, PixelValue()); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(first, BorderSize(bordersize), BorderMode::CONSTANT, PixelValue()); + _border_handler = std::move(b); } }; @@ -113,7 +118,10 @@ public: auto k = arm_compute::support::cpp14::make_unique(); k->configure(first, std::forward(args)...); _kernel = std::move(k); - _border_handler.configure(first, BorderSize(_kernel->border_size()), BorderMode::CONSTANT, PixelValue()); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(first, BorderSize(_kernel->border_size()), BorderMode::CONSTANT, PixelValue()); + _border_handler = std::move(b); } }; diff --git a/tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp b/tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp index 47551355bb..d379ce728e 100644 --- a/tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp +++ b/tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" +#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" #include "tests/NEON/Accessor.h" #include "tests/NEON/Helper.h" #include "tests/framework/Macros.h" diff --git a/tests/validation/NEON/FillBorder.cpp b/tests/validation/NEON/FillBorder.cpp index b567b3f9b6..343ad831e4 100644 --- a/tests/validation/NEON/FillBorder.cpp +++ b/tests/validation/NEON/FillBorder.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "tests/Globals.h" #include "tests/NEON/Accessor.h" #include "tests/datasets/BorderModeDataset.h" diff --git a/tests/validation/NEON/GEMM.cpp b/tests/validation/NEON/GEMM.cpp index 25e8f28dc3..2d8c61164b 100644 --- a/tests/validation/NEON/GEMM.cpp +++ b/tests/validation/NEON/GEMM.cpp @@ -21,12 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "tests/NEON/Accessor.h" #include "tests/NEON/Helper.h" #include "tests/PaddingCalculator.h" diff --git a/tests/validation/NEON/QLSTMLayerNormalization.cpp b/tests/validation/NEON/QLSTMLayerNormalization.cpp index f3cd5fbb56..8925d0b39e 100644 --- a/tests/validation/NEON/QLSTMLayerNormalization.cpp +++ b/tests/validation/NEON/QLSTMLayerNormalization.cpp @@ -21,10 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" #include "tests/NEON/Accessor.h" #include "tests/NEON/Helper.h" #include "tests/PaddingCalculator.h" -- cgit v1.2.1