From ebcebf1dee7f8314976b1e0cabd62b4cf893d765 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Wed, 21 Oct 2020 00:04:14 +0100 Subject: COMPMID-3638: Move NEON kernels Signed-off-by: Michalis Spyrou Change-Id: Ieed3e4bc8be7fef80c90c5094599b477a56fc473 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4285 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins --- src/core/NEON/INEKernel.h | 34 +++ src/core/NEON/INESimpleKernel.h | 34 +++ src/core/NEON/NEKernels.h | 151 +++++++++++ src/core/NEON/NETracePoint.cpp | 2 +- .../NEON/kernels/NEAbsoluteDifferenceKernel.cpp | 2 +- src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h | 86 ++++++ src/core/NEON/kernels/NEAccumulateKernel.cpp | 8 +- src/core/NEON/kernels/NEAccumulateKernel.h | 183 +++++++++++++ src/core/NEON/kernels/NEActivationLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEActivationLayerKernel.h | 87 ++++++ .../NEON/kernels/NEArithmeticAdditionKernel.cpp | 2 +- src/core/NEON/kernels/NEArithmeticAdditionKernel.h | 106 ++++++++ .../NEON/kernels/NEArithmeticSubtractionKernel.cpp | 2 +- .../NEON/kernels/NEArithmeticSubtractionKernel.h | 118 ++++++++ .../NEON/kernels/NEBatchConcatenateLayerKernel.cpp | 2 +- .../NEON/kernels/NEBatchConcatenateLayerKernel.h | 89 ++++++ .../kernels/NEBatchNormalizationLayerKernel.cpp | 2 +- .../NEON/kernels/NEBatchNormalizationLayerKernel.h | 139 ++++++++++ .../NEON/kernels/NEBatchToSpaceLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h | 101 +++++++ src/core/NEON/kernels/NEBitwiseAndKernel.cpp | 2 +- src/core/NEON/kernels/NEBitwiseAndKernel.h | 74 +++++ src/core/NEON/kernels/NEBitwiseNotKernel.cpp | 2 +- src/core/NEON/kernels/NEBitwiseNotKernel.h | 72 +++++ src/core/NEON/kernels/NEBitwiseOrKernel.cpp | 2 +- src/core/NEON/kernels/NEBitwiseOrKernel.h | 74 +++++ src/core/NEON/kernels/NEBitwiseXorKernel.cpp | 2 +- src/core/NEON/kernels/NEBitwiseXorKernel.h | 74 +++++ .../NEON/kernels/NEBoundingBoxTransformKernel.cpp | 2 +- .../NEON/kernels/NEBoundingBoxTransformKernel.h | 95 +++++++ src/core/NEON/kernels/NEBox3x3Kernel.cpp | 4 +- src/core/NEON/kernels/NEBox3x3Kernel.h | 95 +++++++ src/core/NEON/kernels/NECannyEdgeKernel.cpp | 15 +- src/core/NEON/kernels/NECannyEdgeKernel.h | 189 +++++++++++++ src/core/NEON/kernels/NEChannelCombineKernel.cpp | 2 +- src/core/NEON/kernels/NEChannelCombineKernel.h | 129 +++++++++ src/core/NEON/kernels/NEChannelExtractKernel.cpp | 4 +- src/core/NEON/kernels/NEChannelExtractKernel.h | 111 ++++++++ .../NEON/kernels/NEChannelShuffleLayerKernel.cpp | 2 +- .../NEON/kernels/NEChannelShuffleLayerKernel.h | 80 ++++++ src/core/NEON/kernels/NECol2ImKernel.cpp | 2 +- src/core/NEON/kernels/NECol2ImKernel.h | 115 ++++++++ src/core/NEON/kernels/NEColorConvertKernel.cpp | 2 +- src/core/NEON/kernels/NEColorConvertKernel.h | 93 +++++++ .../NEConvertFullyConnectedWeightsKernel.cpp | 2 +- .../kernels/NEConvertFullyConnectedWeightsKernel.h | 95 +++++++ .../kernels/NEConvertQuantizedSignednessKernel.cpp | 2 +- .../kernels/NEConvertQuantizedSignednessKernel.h | 78 ++++++ src/core/NEON/kernels/NEConvolutionKernel.cpp | 2 +- src/core/NEON/kernels/NEConvolutionKernel.h | 299 +++++++++++++++++++++ src/core/NEON/kernels/NECopyKernel.cpp | 2 +- src/core/NEON/kernels/NECopyKernel.h | 80 ++++++ src/core/NEON/kernels/NECropKernel.cpp | 2 +- src/core/NEON/kernels/NECropKernel.h | 114 ++++++++ .../kernels/NECumulativeDistributionKernel.cpp | 2 +- .../NEON/kernels/NECumulativeDistributionKernel.h | 85 ++++++ .../NEON/kernels/NEDepthConcatenateLayerKernel.cpp | 2 +- .../NEON/kernels/NEDepthConcatenateLayerKernel.h | 89 ++++++ .../NEON/kernels/NEDepthConvertLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEDepthConvertLayerKernel.h | 96 +++++++ .../NEON/kernels/NEDepthToSpaceLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h | 81 ++++++ .../NEDepthwiseConvolutionLayerNativeKernel.cpp | 2 +- .../NEDepthwiseConvolutionLayerNativeKernel.h | 131 +++++++++ .../NEON/kernels/NEDequantizationLayerKernel.cpp | 2 +- .../NEON/kernels/NEDequantizationLayerKernel.h | 76 ++++++ src/core/NEON/kernels/NEDerivativeKernel.cpp | 2 +- src/core/NEON/kernels/NEDerivativeKernel.h | 100 +++++++ src/core/NEON/kernels/NEDilateKernel.cpp | 4 +- src/core/NEON/kernels/NEDilateKernel.h | 65 +++++ .../kernels/NEDirectConvolutionLayerKernel.cpp | 2 +- .../NEON/kernels/NEDirectConvolutionLayerKernel.h | 108 ++++++++ .../NEDirectConvolutionLayerOutputStageKernel.cpp | 2 +- .../NEDirectConvolutionLayerOutputStageKernel.h | 102 +++++++ .../NEON/kernels/NEElementwiseOperationKernel.cpp | 2 +- .../NEON/kernels/NEElementwiseOperationKernel.h | 214 +++++++++++++++ src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp | 2 +- src/core/NEON/kernels/NEElementwiseUnaryKernel.h | 103 +++++++ src/core/NEON/kernels/NEErodeKernel.cpp | 4 +- src/core/NEON/kernels/NEErodeKernel.h | 65 +++++ src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp | 2 +- src/core/NEON/kernels/NEFFTDigitReverseKernel.h | 93 +++++++ src/core/NEON/kernels/NEFFTRadixStageKernel.cpp | 2 +- src/core/NEON/kernels/NEFFTRadixStageKernel.h | 103 +++++++ src/core/NEON/kernels/NEFFTScaleKernel.cpp | 2 +- src/core/NEON/kernels/NEFFTScaleKernel.h | 84 ++++++ src/core/NEON/kernels/NEFastCornersKernel.cpp | 2 +- src/core/NEON/kernels/NEFastCornersKernel.h | 78 ++++++ src/core/NEON/kernels/NEFillArrayKernel.cpp | 2 +- src/core/NEON/kernels/NEFillArrayKernel.h | 77 ++++++ src/core/NEON/kernels/NEFillBorderKernel.cpp | 3 +- src/core/NEON/kernels/NEFillBorderKernel.h | 82 ++++++ src/core/NEON/kernels/NEFlattenLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEFlattenLayerKernel.h | 81 ++++++ src/core/NEON/kernels/NEFloorKernel.cpp | 4 +- src/core/NEON/kernels/NEFloorKernel.h | 72 +++++ .../kernels/NEFuseBatchNormalizationKernel.cpp | 2 +- .../NEON/kernels/NEFuseBatchNormalizationKernel.h | 116 ++++++++ src/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h | 89 ++++++ .../NEON/kernels/NEGEMMInterleave4x4Kernel.cpp | 4 +- src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h | 112 ++++++++ .../kernels/NEGEMMLowpMatrixMultiplyKernel.cpp | 2 +- .../NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h | 92 +++++++ .../kernels/NEGEMMLowpOffsetContributionKernel.cpp | 2 +- .../kernels/NEGEMMLowpOffsetContributionKernel.h | 105 ++++++++ ...GEMMLowpOffsetContributionOutputStageKernel.cpp | 2 +- ...NEGEMMLowpOffsetContributionOutputStageKernel.h | 135 ++++++++++ .../NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp | 2 +- .../NEGEMMLowpQuantizeDownInt32ScaleKernel.h | 114 ++++++++ ...tizeDownInt32ToInt16ScaleByFixedPointKernel.cpp | 2 +- ...antizeDownInt32ToInt16ScaleByFixedPointKernel.h | 118 ++++++++ ...ntizeDownInt32ToInt8ScaleByFixedPointKernel.cpp | 2 +- ...uantizeDownInt32ToInt8ScaleByFixedPointKernel.h | 121 +++++++++ ...tizeDownInt32ToUint8ScaleByFixedPointKernel.cpp | 2 +- ...antizeDownInt32ToUint8ScaleByFixedPointKernel.h | 121 +++++++++ .../NEON/kernels/NEGEMMLowpReductionKernel.cpp | 2 +- src/core/NEON/kernels/NEGEMMLowpReductionKernel.h | 196 ++++++++++++++ .../NEON/kernels/NEGEMMMatrixAdditionKernel.cpp | 2 +- src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h | 98 +++++++ .../NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp | 2 +- src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h | 94 +++++++ src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp | 4 +- src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h | 107 ++++++++ src/core/NEON/kernels/NEGatherKernel.cpp | 2 +- src/core/NEON/kernels/NEGatherKernel.h | 113 ++++++++ src/core/NEON/kernels/NEGaussian3x3Kernel.cpp | 4 +- src/core/NEON/kernels/NEGaussian3x3Kernel.h | 66 +++++ src/core/NEON/kernels/NEGaussian5x5Kernel.cpp | 8 +- src/core/NEON/kernels/NEGaussian5x5Kernel.h | 103 +++++++ src/core/NEON/kernels/NEGaussianPyramidKernel.cpp | 4 +- src/core/NEON/kernels/NEGaussianPyramidKernel.h | 105 ++++++++ .../kernels/NEGenerateProposalsLayerKernel.cpp | 2 +- .../NEON/kernels/NEGenerateProposalsLayerKernel.h | 85 ++++++ src/core/NEON/kernels/NEHOGDescriptorKernel.cpp | 2 +- src/core/NEON/kernels/NEHOGDescriptorKernel.h | 149 ++++++++++ src/core/NEON/kernels/NEHOGDetectorKernel.cpp | 2 +- src/core/NEON/kernels/NEHOGDetectorKernel.h | 89 ++++++ src/core/NEON/kernels/NEHarrisCornersKernel.cpp | 2 +- src/core/NEON/kernels/NEHarrisCornersKernel.h | 105 ++++++++ .../kernels/NEHeightConcatenateLayerKernel.cpp | 2 +- .../NEON/kernels/NEHeightConcatenateLayerKernel.h | 83 ++++++ src/core/NEON/kernels/NEHistogramKernel.cpp | 2 +- src/core/NEON/kernels/NEHistogramKernel.h | 135 ++++++++++ src/core/NEON/kernels/NEIm2ColKernel.cpp | 2 +- src/core/NEON/kernels/NEIm2ColKernel.h | 139 ++++++++++ .../kernels/NEInstanceNormalizationLayerKernel.cpp | 2 +- .../kernels/NEInstanceNormalizationLayerKernel.h | 96 +++++++ src/core/NEON/kernels/NEIntegralImageKernel.cpp | 2 +- src/core/NEON/kernels/NEIntegralImageKernel.h | 66 +++++ src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEL2NormalizeLayerKernel.h | 90 +++++++ src/core/NEON/kernels/NELKTrackerKernel.cpp | 2 +- src/core/NEON/kernels/NELKTrackerKernel.h | 141 ++++++++++ .../NELocallyConnectedMatrixMultiplyKernel.cpp | 2 +- .../NELocallyConnectedMatrixMultiplyKernel.h | 79 ++++++ src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp | 2 +- src/core/NEON/kernels/NEMagnitudePhaseKernel.h | 101 +++++++ .../NEON/kernels/NEMaxUnpoolingLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h | 97 +++++++ src/core/NEON/kernels/NEMeanStdDevKernel.cpp | 2 +- src/core/NEON/kernels/NEMeanStdDevKernel.h | 83 ++++++ .../kernels/NEMeanStdDevNormalizationKernel.cpp | 2 +- .../NEON/kernels/NEMeanStdDevNormalizationKernel.h | 98 +++++++ src/core/NEON/kernels/NEMedian3x3Kernel.cpp | 4 +- src/core/NEON/kernels/NEMedian3x3Kernel.h | 66 +++++ src/core/NEON/kernels/NEMemsetKernel.cpp | 2 +- src/core/NEON/kernels/NEMemsetKernel.h | 71 +++++ src/core/NEON/kernels/NEMinMaxLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEMinMaxLayerKernel.h | 90 +++++++ src/core/NEON/kernels/NEMinMaxLocationKernel.cpp | 2 +- src/core/NEON/kernels/NEMinMaxLocationKernel.h | 171 ++++++++++++ src/core/NEON/kernels/NENonLinearFilterKernel.cpp | 2 +- src/core/NEON/kernels/NENonLinearFilterKernel.h | 153 +++++++++++ .../kernels/NENonMaximaSuppression3x3Kernel.cpp | 2 +- .../NEON/kernels/NENonMaximaSuppression3x3Kernel.h | 108 ++++++++ .../NEON/kernels/NENormalizationLayerKernel.cpp | 2 +- src/core/NEON/kernels/NENormalizationLayerKernel.h | 108 ++++++++ src/core/NEON/kernels/NEPadLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEPadLayerKernel.h | 113 ++++++++ src/core/NEON/kernels/NEPermuteKernel.cpp | 4 +- src/core/NEON/kernels/NEPermuteKernel.h | 102 +++++++ .../kernels/NEPixelWiseMultiplicationKernel.cpp | 2 +- .../NEON/kernels/NEPixelWiseMultiplicationKernel.h | 186 +++++++++++++ src/core/NEON/kernels/NEPoolingLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEPoolingLayerKernel.h | 229 ++++++++++++++++ src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEPriorBoxLayerKernel.h | 100 +++++++ .../kernels/NEQLSTMLayerNormalizationKernel.cpp | 2 +- .../NEON/kernels/NEQLSTMLayerNormalizationKernel.h | 146 ++++++++++ .../NEON/kernels/NEQuantizationLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEQuantizationLayerKernel.h | 102 +++++++ src/core/NEON/kernels/NEROIAlignLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEROIAlignLayerKernel.h | 101 +++++++ src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEROIPoolingLayerKernel.h | 81 ++++++ src/core/NEON/kernels/NERangeKernel.cpp | 2 +- src/core/NEON/kernels/NERangeKernel.h | 90 +++++++ .../NEON/kernels/NEReductionOperationKernel.cpp | 4 +- src/core/NEON/kernels/NEReductionOperationKernel.h | 92 +++++++ src/core/NEON/kernels/NERemapKernel.cpp | 2 +- src/core/NEON/kernels/NERemapKernel.h | 83 ++++++ src/core/NEON/kernels/NEReorgLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEReorgLayerKernel.h | 83 ++++++ src/core/NEON/kernels/NEReshapeLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEReshapeLayerKernel.h | 75 ++++++ src/core/NEON/kernels/NEReverseKernel.cpp | 2 +- src/core/NEON/kernels/NEReverseKernel.h | 80 ++++++ src/core/NEON/kernels/NEScaleKernel.cpp | 2 +- src/core/NEON/kernels/NEScaleKernel.h | 127 +++++++++ src/core/NEON/kernels/NEScharr3x3Kernel.cpp | 2 +- src/core/NEON/kernels/NEScharr3x3Kernel.h | 86 ++++++ src/core/NEON/kernels/NESelectKernel.cpp | 2 +- src/core/NEON/kernels/NESelectKernel.h | 103 +++++++ src/core/NEON/kernels/NESobel3x3Kernel.cpp | 2 +- src/core/NEON/kernels/NESobel3x3Kernel.h | 86 ++++++ src/core/NEON/kernels/NESobel5x5Kernel.cpp | 2 +- src/core/NEON/kernels/NESobel5x5Kernel.h | 126 +++++++++ src/core/NEON/kernels/NESobel7x7Kernel.cpp | 2 +- src/core/NEON/kernels/NESobel7x7Kernel.h | 130 +++++++++ src/core/NEON/kernels/NESoftmaxLayerKernel.cpp | 2 +- src/core/NEON/kernels/NESoftmaxLayerKernel.h | 149 ++++++++++ .../NEON/kernels/NESpaceToBatchLayerKernel.cpp | 2 +- src/core/NEON/kernels/NESpaceToBatchLayerKernel.h | 111 ++++++++ .../NEON/kernels/NESpaceToDepthLayerKernel.cpp | 2 +- src/core/NEON/kernels/NESpaceToDepthLayerKernel.h | 81 ++++++ src/core/NEON/kernels/NEStackLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEStackLayerKernel.h | 93 +++++++ src/core/NEON/kernels/NEStridedSliceKernel.cpp | 2 +- src/core/NEON/kernels/NEStridedSliceKernel.h | 102 +++++++ src/core/NEON/kernels/NETableLookupKernel.cpp | 4 +- src/core/NEON/kernels/NETableLookupKernel.h | 82 ++++++ src/core/NEON/kernels/NEThresholdKernel.cpp | 2 +- src/core/NEON/kernels/NEThresholdKernel.h | 88 ++++++ src/core/NEON/kernels/NETileKernel.cpp | 2 +- src/core/NEON/kernels/NETileKernel.h | 78 ++++++ src/core/NEON/kernels/NETransposeKernel.cpp | 2 +- src/core/NEON/kernels/NETransposeKernel.h | 90 +++++++ src/core/NEON/kernels/NEUpsampleLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEUpsampleLayerKernel.h | 99 +++++++ src/core/NEON/kernels/NEWarpKernel.cpp | 2 +- src/core/NEON/kernels/NEWarpKernel.h | 131 +++++++++ src/core/NEON/kernels/NEWeightsReshapeKernel.cpp | 2 +- src/core/NEON/kernels/NEWeightsReshapeKernel.h | 109 ++++++++ .../NEON/kernels/NEWidthConcatenateLayerKernel.cpp | 2 +- .../NEON/kernels/NEWidthConcatenateLayerKernel.h | 82 ++++++ .../kernels/NEWinogradConvolutionLayerKernel.h | 2 +- src/core/NEON/kernels/NEYOLOLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEYOLOLayerKernel.h | 106 ++++++++ .../NEON/kernels/assembly/INEGEMMWrapperKernel.h | 2 +- .../NEDepthwiseConvolutionAssemblyKernelWrapper.h | 2 +- .../kernels/assembly/NEGEMMAssemblyWrapperKernel.h | 2 +- 251 files changed, 13262 insertions(+), 154 deletions(-) create mode 100644 src/core/NEON/INEKernel.h create mode 100644 src/core/NEON/INESimpleKernel.h create mode 100644 src/core/NEON/NEKernels.h create mode 100644 src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h create mode 100644 src/core/NEON/kernels/NEAccumulateKernel.h create mode 100644 src/core/NEON/kernels/NEActivationLayerKernel.h create mode 100644 src/core/NEON/kernels/NEArithmeticAdditionKernel.h create mode 100644 src/core/NEON/kernels/NEArithmeticSubtractionKernel.h create mode 100644 src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h create mode 100644 src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h create mode 100644 src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h create mode 100644 src/core/NEON/kernels/NEBitwiseAndKernel.h create mode 100644 src/core/NEON/kernels/NEBitwiseNotKernel.h create mode 100644 src/core/NEON/kernels/NEBitwiseOrKernel.h create mode 100644 src/core/NEON/kernels/NEBitwiseXorKernel.h create mode 100644 src/core/NEON/kernels/NEBoundingBoxTransformKernel.h create mode 100644 src/core/NEON/kernels/NEBox3x3Kernel.h create mode 100644 src/core/NEON/kernels/NECannyEdgeKernel.h create mode 100644 src/core/NEON/kernels/NEChannelCombineKernel.h create mode 100644 src/core/NEON/kernels/NEChannelExtractKernel.h create mode 100644 src/core/NEON/kernels/NEChannelShuffleLayerKernel.h create mode 100644 src/core/NEON/kernels/NECol2ImKernel.h create mode 100644 src/core/NEON/kernels/NEColorConvertKernel.h create mode 100644 src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h create mode 100644 src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h create mode 100644 src/core/NEON/kernels/NEConvolutionKernel.h create mode 100644 src/core/NEON/kernels/NECopyKernel.h create mode 100644 src/core/NEON/kernels/NECropKernel.h create mode 100644 src/core/NEON/kernels/NECumulativeDistributionKernel.h create mode 100644 src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h create mode 100644 src/core/NEON/kernels/NEDepthConvertLayerKernel.h create mode 100644 src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h create mode 100644 src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h create mode 100644 src/core/NEON/kernels/NEDequantizationLayerKernel.h create mode 100644 src/core/NEON/kernels/NEDerivativeKernel.h create mode 100644 src/core/NEON/kernels/NEDilateKernel.h create mode 100644 src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h create mode 100644 src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h create mode 100644 src/core/NEON/kernels/NEElementwiseOperationKernel.h create mode 100644 src/core/NEON/kernels/NEElementwiseUnaryKernel.h create mode 100644 src/core/NEON/kernels/NEErodeKernel.h create mode 100644 src/core/NEON/kernels/NEFFTDigitReverseKernel.h create mode 100644 src/core/NEON/kernels/NEFFTRadixStageKernel.h create mode 100644 src/core/NEON/kernels/NEFFTScaleKernel.h create mode 100644 src/core/NEON/kernels/NEFastCornersKernel.h create mode 100644 src/core/NEON/kernels/NEFillArrayKernel.h create mode 100644 src/core/NEON/kernels/NEFillBorderKernel.h create mode 100644 src/core/NEON/kernels/NEFlattenLayerKernel.h create mode 100644 src/core/NEON/kernels/NEFloorKernel.h create mode 100644 src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h create mode 100644 src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMLowpReductionKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h create mode 100644 src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h create mode 100644 src/core/NEON/kernels/NEGatherKernel.h create mode 100644 src/core/NEON/kernels/NEGaussian3x3Kernel.h create mode 100644 src/core/NEON/kernels/NEGaussian5x5Kernel.h create mode 100644 src/core/NEON/kernels/NEGaussianPyramidKernel.h create mode 100644 src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h create mode 100644 src/core/NEON/kernels/NEHOGDescriptorKernel.h create mode 100644 src/core/NEON/kernels/NEHOGDetectorKernel.h create mode 100644 src/core/NEON/kernels/NEHarrisCornersKernel.h create mode 100644 src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h create mode 100644 src/core/NEON/kernels/NEHistogramKernel.h create mode 100644 src/core/NEON/kernels/NEIm2ColKernel.h create mode 100644 src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h create mode 100644 src/core/NEON/kernels/NEIntegralImageKernel.h create mode 100644 src/core/NEON/kernels/NEL2NormalizeLayerKernel.h create mode 100644 src/core/NEON/kernels/NELKTrackerKernel.h create mode 100644 src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h create mode 100644 src/core/NEON/kernels/NEMagnitudePhaseKernel.h create mode 100644 src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h create mode 100644 src/core/NEON/kernels/NEMeanStdDevKernel.h create mode 100644 src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h create mode 100644 src/core/NEON/kernels/NEMedian3x3Kernel.h create mode 100644 src/core/NEON/kernels/NEMemsetKernel.h create mode 100644 src/core/NEON/kernels/NEMinMaxLayerKernel.h create mode 100644 src/core/NEON/kernels/NEMinMaxLocationKernel.h create mode 100644 src/core/NEON/kernels/NENonLinearFilterKernel.h create mode 100644 src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h create mode 100644 src/core/NEON/kernels/NENormalizationLayerKernel.h create mode 100644 src/core/NEON/kernels/NEPadLayerKernel.h create mode 100644 src/core/NEON/kernels/NEPermuteKernel.h create mode 100644 src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h create mode 100644 src/core/NEON/kernels/NEPoolingLayerKernel.h create mode 100644 src/core/NEON/kernels/NEPriorBoxLayerKernel.h create mode 100644 src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h create mode 100644 src/core/NEON/kernels/NEQuantizationLayerKernel.h create mode 100644 src/core/NEON/kernels/NEROIAlignLayerKernel.h create mode 100644 src/core/NEON/kernels/NEROIPoolingLayerKernel.h create mode 100644 src/core/NEON/kernels/NERangeKernel.h create mode 100644 src/core/NEON/kernels/NEReductionOperationKernel.h create mode 100644 src/core/NEON/kernels/NERemapKernel.h create mode 100644 src/core/NEON/kernels/NEReorgLayerKernel.h create mode 100644 src/core/NEON/kernels/NEReshapeLayerKernel.h create mode 100644 src/core/NEON/kernels/NEReverseKernel.h create mode 100644 src/core/NEON/kernels/NEScaleKernel.h create mode 100644 src/core/NEON/kernels/NEScharr3x3Kernel.h create mode 100644 src/core/NEON/kernels/NESelectKernel.h create mode 100644 src/core/NEON/kernels/NESobel3x3Kernel.h create mode 100644 src/core/NEON/kernels/NESobel5x5Kernel.h create mode 100644 src/core/NEON/kernels/NESobel7x7Kernel.h create mode 100644 src/core/NEON/kernels/NESoftmaxLayerKernel.h create mode 100644 src/core/NEON/kernels/NESpaceToBatchLayerKernel.h create mode 100644 src/core/NEON/kernels/NESpaceToDepthLayerKernel.h create mode 100644 src/core/NEON/kernels/NEStackLayerKernel.h create mode 100644 src/core/NEON/kernels/NEStridedSliceKernel.h create mode 100644 src/core/NEON/kernels/NETableLookupKernel.h create mode 100644 src/core/NEON/kernels/NEThresholdKernel.h create mode 100644 src/core/NEON/kernels/NETileKernel.h create mode 100644 src/core/NEON/kernels/NETransposeKernel.h create mode 100644 src/core/NEON/kernels/NEUpsampleLayerKernel.h create mode 100644 src/core/NEON/kernels/NEWarpKernel.h create mode 100644 src/core/NEON/kernels/NEWeightsReshapeKernel.h create mode 100644 src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h create mode 100644 src/core/NEON/kernels/NEYOLOLayerKernel.h (limited to 'src/core/NEON') diff --git a/src/core/NEON/INEKernel.h b/src/core/NEON/INEKernel.h new file mode 100644 index 0000000000..7ad20166d8 --- /dev/null +++ b/src/core/NEON/INEKernel.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_INEKERNEL_H +#define ARM_COMPUTE_INEKERNEL_H + +#include "arm_compute/core/CPP/ICPPKernel.h" + +namespace arm_compute +{ +/** Common interface for all kernels implemented in NEON. */ +using INEKernel = ICPPKernel; +} // namespace arm_compute +#endif /*ARM_COMPUTE_INEKERNEL_H */ diff --git a/src/core/NEON/INESimpleKernel.h b/src/core/NEON/INESimpleKernel.h new file mode 100644 index 0000000000..da32d6619e --- /dev/null +++ b/src/core/NEON/INESimpleKernel.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_INESIMPLEKERNEL_H +#define ARM_COMPUTE_INESIMPLEKERNEL_H + +#include "arm_compute/core/CPP/ICPPSimpleKernel.h" + +namespace arm_compute +{ +/** Interface for simple NEON kernels having 1 tensor input and 1 tensor output */ +using INESimpleKernel = ICPPSimpleKernel; +} // namespace arm_compute +#endif /*ARM_COMPUTE_INESIMPLEKERNEL_H */ diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h new file mode 100644 index 0000000000..c1924d6739 --- /dev/null +++ b/src/core/NEON/NEKernels.h @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEKERNELS_H +#define ARM_COMPUTE_NEKERNELS_H + +/* Header regrouping all the NEON kernels */ +#include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" +#include "src/core/NEON/kernels/NEAccumulateKernel.h" +#include "src/core/NEON/kernels/NEActivationLayerKernel.h" +#include "src/core/NEON/kernels/NEArithmeticAdditionKernel.h" +#include "src/core/NEON/kernels/NEArithmeticSubtractionKernel.h" +#include "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" +#include "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h" +#include "src/core/NEON/kernels/NEBitwiseAndKernel.h" +#include "src/core/NEON/kernels/NEBitwiseNotKernel.h" +#include "src/core/NEON/kernels/NEBitwiseOrKernel.h" +#include "src/core/NEON/kernels/NEBitwiseXorKernel.h" +#include "src/core/NEON/kernels/NEBoundingBoxTransformKernel.h" +#include "src/core/NEON/kernels/NEBox3x3Kernel.h" +#include "src/core/NEON/kernels/NECannyEdgeKernel.h" +#include "src/core/NEON/kernels/NEChannelCombineKernel.h" +#include "src/core/NEON/kernels/NEChannelExtractKernel.h" +#include "src/core/NEON/kernels/NEChannelShuffleLayerKernel.h" +#include "src/core/NEON/kernels/NECol2ImKernel.h" +#include "src/core/NEON/kernels/NEColorConvertKernel.h" +#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEConvolutionKernel.h" +#include "src/core/NEON/kernels/NECopyKernel.h" +#include "src/core/NEON/kernels/NECropKernel.h" +#include "src/core/NEON/kernels/NECumulativeDistributionKernel.h" +#include "src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h" +#include "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h" +#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" +#include "src/core/NEON/kernels/NEDequantizationLayerKernel.h" +#include "src/core/NEON/kernels/NEDerivativeKernel.h" +#include "src/core/NEON/kernels/NEDilateKernel.h" +#include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" +#include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" +#include "src/core/NEON/kernels/NEElementwiseOperationKernel.h" +#include "src/core/NEON/kernels/NEElementwiseUnaryKernel.h" +#include "src/core/NEON/kernels/NEErodeKernel.h" +#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h" +#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h" +#include "src/core/NEON/kernels/NEFFTScaleKernel.h" +#include "src/core/NEON/kernels/NEFastCornersKernel.h" +#include "src/core/NEON/kernels/NEFillArrayKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" +#include "src/core/NEON/kernels/NEFloorKernel.h" +#include "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h" +#include "src/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/core/NEON/kernels/NEGatherKernel.h" +#include "src/core/NEON/kernels/NEGaussian3x3Kernel.h" +#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h" +#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h" +#include "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" +#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h" +#include "src/core/NEON/kernels/NEHOGDetectorKernel.h" +#include "src/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEHistogramKernel.h" +#include "src/core/NEON/kernels/NEIm2ColKernel.h" +#include "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h" +#include "src/core/NEON/kernels/NEIntegralImageKernel.h" +#include "src/core/NEON/kernels/NEL2NormalizeLayerKernel.h" +#include "src/core/NEON/kernels/NELKTrackerKernel.h" +#include "src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h" +#include "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h" +#include "src/core/NEON/kernels/NEMeanStdDevKernel.h" +#include "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h" +#include "src/core/NEON/kernels/NEMedian3x3Kernel.h" +#include "src/core/NEON/kernels/NEMemsetKernel.h" +#include "src/core/NEON/kernels/NEMinMaxLayerKernel.h" +#include "src/core/NEON/kernels/NEMinMaxLocationKernel.h" +#include "src/core/NEON/kernels/NENonLinearFilterKernel.h" +#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" +#include "src/core/NEON/kernels/NENormalizationLayerKernel.h" +#include "src/core/NEON/kernels/NEPadLayerKernel.h" +#include "src/core/NEON/kernels/NEPermuteKernel.h" +#include "src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" +#include "src/core/NEON/kernels/NEPoolingLayerKernel.h" +#include "src/core/NEON/kernels/NEPriorBoxLayerKernel.h" +#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" +#include "src/core/NEON/kernels/NEQuantizationLayerKernel.h" +#include "src/core/NEON/kernels/NEROIAlignLayerKernel.h" +#include "src/core/NEON/kernels/NEROIPoolingLayerKernel.h" +#include "src/core/NEON/kernels/NERangeKernel.h" +#include "src/core/NEON/kernels/NEReductionOperationKernel.h" +#include "src/core/NEON/kernels/NERemapKernel.h" +#include "src/core/NEON/kernels/NEReorgLayerKernel.h" +#include "src/core/NEON/kernels/NEReshapeLayerKernel.h" +#include "src/core/NEON/kernels/NEReverseKernel.h" +#include "src/core/NEON/kernels/NEScaleKernel.h" +#include "src/core/NEON/kernels/NEScharr3x3Kernel.h" +#include "src/core/NEON/kernels/NESelectKernel.h" +#include "src/core/NEON/kernels/NESobel3x3Kernel.h" +#include "src/core/NEON/kernels/NESobel5x5Kernel.h" +#include "src/core/NEON/kernels/NESobel7x7Kernel.h" +#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h" +#include "src/core/NEON/kernels/NESpaceToBatchLayerKernel.h" +#include "src/core/NEON/kernels/NESpaceToDepthLayerKernel.h" +#include "src/core/NEON/kernels/NEStackLayerKernel.h" +#include "src/core/NEON/kernels/NEStridedSliceKernel.h" +#include "src/core/NEON/kernels/NETableLookupKernel.h" +#include "src/core/NEON/kernels/NEThresholdKernel.h" +#include "src/core/NEON/kernels/NETileKernel.h" +#include "src/core/NEON/kernels/NETransposeKernel.h" +#include "src/core/NEON/kernels/NEUpsampleLayerKernel.h" +#include "src/core/NEON/kernels/NEWarpKernel.h" +#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h" +#include "src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEYOLOLayerKernel.h" + +#endif /* ARM_COMPUTE_NEKERNELS_H */ diff --git a/src/core/NEON/NETracePoint.cpp b/src/core/NEON/NETracePoint.cpp index 4a6bffa54e..bf48b411ec 100644 --- a/src/core/NEON/NETracePoint.cpp +++ b/src/core/NEON/NETracePoint.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/core/TracePoint.h" -#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" +#include "src/core/NEON/kernels/NELKTrackerKernel.h" #include "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" #include "src/core/NEON/kernels/convolution/common/convolution.hpp" #include "utils/TypePrinter.h" diff --git a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp index acea0af02d..a6a41b8af9 100644 --- a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp +++ b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" +#include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h new file mode 100644 index 0000000000..cc95172f35 --- /dev/null +++ b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H +#define ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the absolute difference kernel + * + * Absolute difference is computed by: + * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] + */ +class NEAbsoluteDifferenceKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEAbsoluteDifferenceKernel"; + } + /** Default constructor */ + NEAbsoluteDifferenceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAbsoluteDifferenceKernel(const NEAbsoluteDifferenceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAbsoluteDifferenceKernel &operator=(const NEAbsoluteDifferenceKernel &) = delete; + /** Allow instances of this class to be moved */ + NEAbsoluteDifferenceKernel(NEAbsoluteDifferenceKernel &&) = default; + /** Allow instances of this class to be moved */ + NEAbsoluteDifferenceKernel &operator=(NEAbsoluteDifferenceKernel &&) = default; + /** Default destructor */ + ~NEAbsoluteDifferenceKernel() = default; + + /** Set the inputs and output tensors + * + * @param[in] input1 Source tensor. Data types supported: U8/S16 + * @param[in] input2 Source tensor. Data types supported: U8/S16 + * @param[out] output Destination tensor, Data types supported: U8/S16 + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised absolute difference functions + * + * @param[in] input1 An input tensor. Data types supported: U8/S16. + * @param[in] input2 An input tensor. Data types supported: U8/S16. + * @param[out] output The output tensor, Data types supported: U8 (Only if both inputs are U8), S16. + * @param[in] window Region on which to execute the kernel. + */ + using AbsDiffFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); + + /** Absolute difference function to use for the particular tensor formats passed to configure() */ + AbsDiffFunction *_func; + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEAccumulateKernel.cpp b/src/core/NEON/kernels/NEAccumulateKernel.cpp index 73ef7eb66f..46179cadcb 100644 --- a/src/core/NEON/kernels/NEAccumulateKernel.cpp +++ b/src/core/NEON/kernels/NEAccumulateKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEAccumulateKernel.h" +#include "src/core/NEON/kernels/NEAccumulateKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -33,13 +33,8 @@ #include -using namespace arm_compute; - namespace arm_compute { -class Coordinates; -} // namespace arm_compute - /* Max S16 value used for saturation purposes. */ const static uint16x8_t max_int_u16 = vdupq_n_u16(static_cast(INT16_MAX)); @@ -361,3 +356,4 @@ void NEAccumulateSquaredKernel::run(const Window &window, const ThreadInfo &info }, input, accum); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/core/NEON/kernels/NEAccumulateKernel.h b/src/core/NEON/kernels/NEAccumulateKernel.h new file mode 100644 index 0000000000..af1298f53f --- /dev/null +++ b/src/core/NEON/kernels/NEAccumulateKernel.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEACCUMULATEKERNEL_H +#define ARM_COMPUTE_NEACCUMULATEKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface for the accumulate kernel + * + * Accumulation is computed by: + * @f[ accum(x,y) = accum(x,y) + input(x,y) @f] + */ +class NEAccumulateKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEAccumulateKernel"; + } + /** Default constructor */ + NEAccumulateKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateKernel(const NEAccumulateKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateKernel &operator=(const NEAccumulateKernel &) = delete; + /** Allow instances of this class to be moved */ + NEAccumulateKernel(NEAccumulateKernel &&) = default; + /** Allow instances of this class to be moved */ + NEAccumulateKernel &operator=(NEAccumulateKernel &&) = default; + /** Default destructor */ + ~NEAccumulateKernel() = default; + /** Set the input and accumulation tensors + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] accum Destination tensor. Data type supported: S16. + */ + void configure(const ITensor *input, ITensor *accum); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; +}; + +/** Interface for the accumulate weighted kernel + * + * Weighted accumulation is computed: + * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f] + * + * Where @f$ 0 \le \alpha \le 1 @f$ + * Conceptually, the rounding for this is defined as: + * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f] +*/ +class NEAccumulateWeightedKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEAccumulateWeightedKernel"; + } + /** Default constructor */ + NEAccumulateWeightedKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateWeightedKernel(const NEAccumulateWeightedKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateWeightedKernel &operator=(const NEAccumulateWeightedKernel &) = delete; + /** Allow instances of this class to be moved */ + NEAccumulateWeightedKernel(NEAccumulateWeightedKernel &&) = default; + /** Allow instances of this class to be moved */ + NEAccumulateWeightedKernel &operator=(NEAccumulateWeightedKernel &&) = default; + /** Default destructor */ + ~NEAccumulateWeightedKernel() = default; + /** Set the input and accumulation tensors, and the scale value + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] alpha Scalar value in the range [0.0f, 1.0f] + * @param[in,out] accum Accumulated tensor. Data type supported: U8. + */ + void configure(const ITensor *input, float alpha, ITensor *accum); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +protected: + float _alpha; +}; + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +/** Interface for the accumulate weighted kernel using F16 */ +class NEAccumulateWeightedFP16Kernel : public NEAccumulateWeightedKernel +{ +public: + const char *name() const override + { + return "NEAccumulateWeightedFP16Kernel"; + } + /** Default constructor */ + NEAccumulateWeightedFP16Kernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateWeightedFP16Kernel(const NEAccumulateWeightedFP16Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateWeightedFP16Kernel &operator=(const NEAccumulateWeightedFP16Kernel &) = delete; + /** Allow instances of this class to be moved */ + NEAccumulateWeightedFP16Kernel(NEAccumulateWeightedFP16Kernel &&) = default; + /** Allow instances of this class to be moved */ + NEAccumulateWeightedFP16Kernel &operator=(NEAccumulateWeightedFP16Kernel &&) = default; + /** Default destructor */ + ~NEAccumulateWeightedFP16Kernel() = default; + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; +}; +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** Interface for the accumulate weighted kernel using F16 */ +using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel; +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + +/** Interface for the accumulate squared kernel + * + * The accumulation of squares is computed: + * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f] + * + * Where @f$ 0 \le shift \le 15 @f$ +*/ +class NEAccumulateSquaredKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEAccumulateSquaredKernel"; + } + /** Default constructor */ + NEAccumulateSquaredKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateSquaredKernel(const NEAccumulateSquaredKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAccumulateSquaredKernel &operator=(const NEAccumulateSquaredKernel &) = delete; + /** Allow instances of this class to be moved */ + NEAccumulateSquaredKernel(NEAccumulateSquaredKernel &&) = default; + /** Allow instances of this class to be moved */ + NEAccumulateSquaredKernel &operator=(NEAccumulateSquaredKernel &&) = default; + /** Default destructor */ + ~NEAccumulateSquaredKernel() = default; + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] shift Shift value in the range of [0, 15] + * @param[in,out] accum Accumulated tensor. Data type supported: S16. + */ + void configure(const ITensor *input, uint32_t shift, ITensor *accum); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + uint32_t _shift; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEACCUMULATEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index f61f048a87..51257cb96b 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" +#include "src/core/NEON/kernels/NEActivationLayerKernel.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.h b/src/core/NEON/kernels/NEActivationLayerKernel.h new file mode 100644 index 0000000000..783783c6ab --- /dev/null +++ b/src/core/NEON/kernels/NEActivationLayerKernel.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H +#define ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H + +#include "arm_compute/core/utils/misc/Traits.h" +#include "src/core/NEON/INEKernel.h" + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#include +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the activation layer kernel. */ +class NEActivationLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEActivationLayerKernel"; + } + /** Constructor */ + NEActivationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEActivationLayerKernel(const NEActivationLayerKernel &) = delete; + /** Default move constructor */ + NEActivationLayerKernel(NEActivationLayerKernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEActivationLayerKernel &operator=(const NEActivationLayerKernel &) = delete; + /** Default move assignment operator */ + NEActivationLayerKernel &operator=(NEActivationLayerKernel &&) = default; + /** Default destructor */ + ~NEActivationLayerKernel() = default; + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in, out] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[out] output Destination tensor info. Data type supported: same as @p input + * @param[in] activation_info Activation layer information. + */ + void configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info); + /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel + * + * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result + * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + ActivationLayerInfo _act_info; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index 7f1a35fb55..fa26b903f1 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" +#include "src/core/NEON/kernels/NEArithmeticAdditionKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.h b/src/core/NEON/kernels/NEArithmeticAdditionKernel.h new file mode 100644 index 0000000000..2072ad91bd --- /dev/null +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H +#define ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform addition between two tensors */ +class NEArithmeticAdditionKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEArithmeticAdditionKernel"; + } + /** Default constructor */ + NEArithmeticAdditionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticAdditionKernel(const NEArithmeticAdditionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticAdditionKernel &operator=(const NEArithmeticAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEArithmeticAdditionKernel(NEArithmeticAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEArithmeticAdditionKernel &operator=(NEArithmeticAdditionKernel &&) = default; + /** Default destructor */ + ~NEArithmeticAdditionKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (S16,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,S16) -> S16 + * - (S32,S32) -> S32 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * + * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] policy Overflow policy. + */ + void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy); + /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAdditionKernel + * + * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] policy Overflow policy. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised add functions + * + * @param[in] input1 First input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32 + * @param[out] output The output tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32. + * @param[in] policy Overflow policy. + * @param[in] window Region on which to execute the kernel. + */ + using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const Window &window); + /** Add function to use for the particular tensor types passed to configure() */ + AddFunction *_func; + ConvertPolicy _policy; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp index 49e503fac4..bdd356ad7f 100644 --- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" +#include "src/core/NEON/kernels/NEArithmeticSubtractionKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.h new file mode 100644 index 0000000000..69952d6162 --- /dev/null +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H +#define ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform subtraction between two tensors */ +class NEArithmeticSubtractionKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEArithmeticSubtractionKernel"; + } + /** Default constructor */ + NEArithmeticSubtractionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticSubtractionKernel(const NEArithmeticSubtractionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticSubtractionKernel &operator=(const NEArithmeticSubtractionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEArithmeticSubtractionKernel(NEArithmeticSubtractionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEArithmeticSubtractionKernel &operator=(NEArithmeticSubtractionKernel &&) = default; + /** Default destructor */ + ~NEArithmeticSubtractionKernel() = default; + + /** Initialise the kernel's input and output. + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (QASYMM8, QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (S16,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,S16) -> S16 + * - (S32,S32) -> S32 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * + * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32. + * @param[in] policy Overflow policy. Convert policy cannot be WRAP if datatype is quantized. + */ + void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy); + /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtractionKernel + * + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (QASYMM8, QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (S16,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,S16) -> S16 + * - (S32,S32) -> S32 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * + * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32. + * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised sub functions + * + * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32. + * @param[in] window Region on which to execute the kernel. + * @param[in] is_sat Flag to indicate if the policy is SATURATE. + */ + using SubFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window, bool is_sat); + /** Sub function to use for the particular tensor types passed to configure() */ + SubFunction *_func; + ConvertPolicy _policy; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp index 65ac996f46..ddf69710f9 100644 --- a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h new file mode 100644 index 0000000000..b74a94805d --- /dev/null +++ b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H +#define ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the batch concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class NEBatchConcatenateLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEBatchConcatenateLayerKernel"; + } + /** Default constructor */ + NEBatchConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchConcatenateLayerKernel(const NEBatchConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchConcatenateLayerKernel &operator=(const NEBatchConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBatchConcatenateLayerKernel(NEBatchConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBatchConcatenateLayerKernel &operator=(NEBatchConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~NEBatchConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor info. Data types supported: All. + * @param[in] batch_offset The offset on axis # 3. + * @param[in,out] output Output tensor info. Data types supported: Same as @p input. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEBatchConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: All. + * @param[in] batch_offset The offset on axis # 3. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + using BatchConcatFunction = void(const ITensor *in, ITensor *out, unsigned int batch_offset, const Window &window); + +private: + BatchConcatFunction *_func; + unsigned int _batch_offset; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp index bda396662f..afb08e5d1c 100644 --- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" +#include "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h new file mode 100644 index 0000000000..9312073ce8 --- /dev/null +++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the batch normalization layer kernel. + */ +class NEBatchNormalizationLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEBatchNormalizationLayerKernel"; + } + /** Default constructor */ + NEBatchNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchNormalizationLayerKernel(const NEBatchNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchNormalizationLayerKernel &operator=(const NEBatchNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + NEBatchNormalizationLayerKernel(NEBatchNormalizationLayerKernel &&) = default; + /** Default move assignment operator */ + NEBatchNormalizationLayerKernel &operator=(NEBatchNormalizationLayerKernel &&) = default; + /** Default destructor */ + ~NEBatchNormalizationLayerKernel() = default; + /** Set the input and output tensors. + * + * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place + * + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F16/F32. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input + * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input + * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + */ + void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta = nullptr, const ITensor *gamma = nullptr, float epsilon = 0.001f, + ActivationLayerInfo act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref NEBatchNormalizationLayerKernel + * + * @param[in] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F16/F32. + * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input + * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input + * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const ITensorInfo *mean, const ITensorInfo *var, + const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr, + float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Configure execution function in case of non-fused activation **/ + void configure_non_fused(); + /** Configure execution function in case of fused activation **/ + void configure_fused(); + + /** Template function to run batch normalization on fp32 + * + * @tparam T Specialization data type + * @tparam fused_activation Boolean that flags if its a fused activation or not + * @tparam F Activation function functor to run + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void batch_normalization_nchw(const Window &window); + /** Template function to run batch normalization on fp32 on tensors with NHWC format + * + * @tparam T Specialization data type + * @tparam fused_activation Boolean that flags if its a fused activation or not + * @tparam F Activation function functor to run + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void batch_normalization_nhwc(const Window &window); + /** Common signature for all the batch normalization functions + * + * @param[in] window Region on which to execute the kernel. + */ + using BatchNormFunctionPtr = void (NEBatchNormalizationLayerKernel::*)(const Window &window); + +private: + BatchNormFunctionPtr _func; + ITensor *_input; + ITensor *_output; + const ITensor *_mean; + const ITensor *_var; + const ITensor *_gamma; + const ITensor *_beta; + float _epsilon; + ActivationLayerInfo _act_info; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp index e24d7b6c0a..10207b9cf6 100644 --- a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h" +#include "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h new file mode 100644 index 0000000000..26e8224922 --- /dev/null +++ b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H +#define ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the batch to space kernel */ +class NEBatchToSpaceLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEBatchToSpaceLayerKernel"; + } + /** Default constructor */ + NEBatchToSpaceLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchToSpaceLayerKernel(const NEBatchToSpaceLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchToSpaceLayerKernel &operator=(const NEBatchToSpaceLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBatchToSpaceLayerKernel(NEBatchToSpaceLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBatchToSpaceLayerKernel &operator=(NEBatchToSpaceLayerKernel &&) = default; + /** Default destructor */ + ~NEBatchToSpaceLayerKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const ITensor *input, const ITensor *block_shape, ITensor *output); + /** Initialise the kernel's inputs and output (Static block shape). + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const ITensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[in] output Tensor output. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel (Static block shape). + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[in] output Tensor output. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; /**< Source tensor */ + const ITensor *_block_shape; /**< Block shape tensor */ + ITensor *_output; /**< Destination tensor */ + DataLayout _data_layout; /**< Data layout to be used at run-time */ + + int32_t _block_shape_x; + int32_t _block_shape_y; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBitwiseAndKernel.cpp b/src/core/NEON/kernels/NEBitwiseAndKernel.cpp index 2d49ff825e..4f4de70c3c 100644 --- a/src/core/NEON/kernels/NEBitwiseAndKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseAndKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h" +#include "src/core/NEON/kernels/NEBitwiseAndKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEBitwiseAndKernel.h b/src/core/NEON/kernels/NEBitwiseAndKernel.h new file mode 100644 index 0000000000..e4603f68f6 --- /dev/null +++ b/src/core/NEON/kernels/NEBitwiseAndKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEBITWISEANDKERNEL_H +#define ARM_COMPUTE_NEBITWISEANDKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise AND between XY-planes of two tensors + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f] + */ +class NEBitwiseAndKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEBitwiseAndKernel"; + } + /** Default constructor */ + NEBitwiseAndKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseAndKernel(const NEBitwiseAndKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseAndKernel &operator=(const NEBitwiseAndKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseAndKernel(NEBitwiseAndKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseAndKernel &operator=(NEBitwiseAndKernel &&) = default; + /** Default destructor */ + ~NEBitwiseAndKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input1 An input tensor. Data type supported: U8. + * @param[in] input2 An input tensor. Data type supported: U8 + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input1; /**< Source tensor 1 */ + const ITensor *_input2; /**< Source tensor 2 */ + ITensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEBITWISEANDKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBitwiseNotKernel.cpp b/src/core/NEON/kernels/NEBitwiseNotKernel.cpp index eed9b273ae..c69c4ea046 100644 --- a/src/core/NEON/kernels/NEBitwiseNotKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseNotKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h" +#include "src/core/NEON/kernels/NEBitwiseNotKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEBitwiseNotKernel.h b/src/core/NEON/kernels/NEBitwiseNotKernel.h new file mode 100644 index 0000000000..ba47c38143 --- /dev/null +++ b/src/core/NEON/kernels/NEBitwiseNotKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEBITWISENOTKERNEL_H +#define ARM_COMPUTE_NEBITWISENOTKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise NOT operation + * + * Result is computed by: + * @f[ output(x,y) = \lnot input(x,y) @f] + */ +class NEBitwiseNotKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEBitwiseNotKernel"; + } + /** Default constructor */ + NEBitwiseNotKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseNotKernel(const NEBitwiseNotKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseNotKernel &operator=(const NEBitwiseNotKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseNotKernel(NEBitwiseNotKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseNotKernel &operator=(NEBitwiseNotKernel &&) = default; + /** Default destructor */ + ~NEBitwiseNotKernel() = default; + /** Initialise the kernel's input and output + * + * @param[in] input An input tensor. Data type supported: U8. + * @param[out] output The output tensor. Data type supported: U8. + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEBITWISENOTKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBitwiseOrKernel.cpp b/src/core/NEON/kernels/NEBitwiseOrKernel.cpp index f96117e860..875e6391a5 100644 --- a/src/core/NEON/kernels/NEBitwiseOrKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseOrKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h" +#include "src/core/NEON/kernels/NEBitwiseOrKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEBitwiseOrKernel.h b/src/core/NEON/kernels/NEBitwiseOrKernel.h new file mode 100644 index 0000000000..40ef757d60 --- /dev/null +++ b/src/core/NEON/kernels/NEBitwiseOrKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEBITWISEORKERNEL_H +#define ARM_COMPUTE_NEBITWISEORKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise inclusive OR between two tensors + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f] + */ +class NEBitwiseOrKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEBitwiseOrKernel"; + } + /** Default constructor */ + NEBitwiseOrKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseOrKernel(const NEBitwiseOrKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseOrKernel &operator=(const NEBitwiseOrKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseOrKernel(NEBitwiseOrKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseOrKernel &operator=(NEBitwiseOrKernel &&) = default; + /** Default destructor */ + ~NEBitwiseOrKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input1 An input tensor. Data type supported: U8. + * @param[in] input2 An input tensor. Data type supported: U8 + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input1; /**< Source tensor 1 */ + const ITensor *_input2; /**< Source tensor 2 */ + ITensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEBITWISEORKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBitwiseXorKernel.cpp b/src/core/NEON/kernels/NEBitwiseXorKernel.cpp index 45d2b0a0db..603b49d5eb 100644 --- a/src/core/NEON/kernels/NEBitwiseXorKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseXorKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h" +#include "src/core/NEON/kernels/NEBitwiseXorKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEBitwiseXorKernel.h b/src/core/NEON/kernels/NEBitwiseXorKernel.h new file mode 100644 index 0000000000..24d07a6e18 --- /dev/null +++ b/src/core/NEON/kernels/NEBitwiseXorKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEBITWISEXORKERNEL_H +#define ARM_COMPUTE_NEBITWISEXORKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise exclusive OR (XOR) between two tensors + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f] + */ +class NEBitwiseXorKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEBitwiseXorKernel"; + } + /** Default constructor */ + NEBitwiseXorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseXorKernel(const NEBitwiseXorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseXorKernel &operator=(const NEBitwiseXorKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseXorKernel(NEBitwiseXorKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseXorKernel &operator=(NEBitwiseXorKernel &&) = default; + /** Default destructor */ + ~NEBitwiseXorKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input1 An input tensor. Data type supported: U8. + * @param[in] input2 An input tensor. Data type supported: U8 + * @param[out] output The output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input1; /**< Source tensor 1 */ + const ITensor *_input2; /**< Source tensor 2 */ + ITensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEBITWISEXORKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp index 5a18e88321..03d6e1c600 100644 --- a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp +++ b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h" +#include "src/core/NEON/kernels/NEBoundingBoxTransformKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.h b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.h new file mode 100644 index 0000000000..c080ce6a5c --- /dev/null +++ b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H +#define ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the bounding box kernel */ +class NEBoundingBoxTransformKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEBoundingBoxTransformKernel"; + } + + /** Default constructor */ + NEBoundingBoxTransformKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBoundingBoxTransformKernel(const NEBoundingBoxTransformKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBoundingBoxTransformKernel &operator=(const NEBoundingBoxTransformKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBoundingBoxTransformKernel(NEBoundingBoxTransformKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBoundingBoxTransformKernel &operator=(NEBoundingBoxTransformKernel &&) = default; + /** Default destructor */ + ~NEBoundingBoxTransformKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. + * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input. + * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. + * + * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. + * + */ + void configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info); + + /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform + * + * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. + * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input + * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. + * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input. + * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. + * + * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. + * + * @return a Status + */ + static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + template + void internal_run(const Window &window); + + const ITensor *_boxes; + ITensor *_pred_boxes; + const ITensor *_deltas; + BoundingBoxTransformInfo _bbinfo; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBox3x3Kernel.cpp b/src/core/NEON/kernels/NEBox3x3Kernel.cpp index 1177f6f1dd..2aa8aa8e99 100644 --- a/src/core/NEON/kernels/NEBox3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEBox3x3Kernel.cpp @@ -21,14 +21,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEBox3x3Kernel.h" +#include "src/core/NEON/kernels/NEBox3x3Kernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEBox3x3Kernel.h b/src/core/NEON/kernels/NEBox3x3Kernel.h new file mode 100644 index 0000000000..f6a64a7bb4 --- /dev/null +++ b/src/core/NEON/kernels/NEBox3x3Kernel.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEBOX3x3KERNEL_H +#define ARM_COMPUTE_NEBOX3x3KERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a Box 3x3 filter */ +class NEBox3x3Kernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEBox3x3Kernel"; + } + /** Default constructor */ + NEBox3x3Kernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBox3x3Kernel(const NEBox3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBox3x3Kernel &operator=(const NEBox3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NEBox3x3Kernel(NEBox3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NEBox3x3Kernel &operator=(NEBox3x3Kernel &&) = default; + /** Default destructor */ + ~NEBox3x3Kernel() = default; + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; +}; + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +/** NEON kernel to perform a Box 3x3 filter for FP16 datatype + */ +class NEBox3x3FP16Kernel : public NEBox3x3Kernel +{ +public: + const char *name() const override + { + return "NEBox3x3FP16Kernel"; + } + /** Default constructor */ + NEBox3x3FP16Kernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBox3x3FP16Kernel(const NEBox3x3FP16Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBox3x3FP16Kernel &operator=(const NEBox3x3FP16Kernel &) = delete; + /** Allow instances of this class to be moved */ + NEBox3x3FP16Kernel(NEBox3x3FP16Kernel &&) = default; + /** Allow instances of this class to be moved */ + NEBox3x3FP16Kernel &operator=(NEBox3x3FP16Kernel &&) = default; + /** Default destructor */ + ~NEBox3x3FP16Kernel() = default; + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; +}; +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** NEON kernel to perform a Box 3x3 filter for FP16 datatype */ +using NEBox3x3FP16Kernel = NEBox3x3Kernel; +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEBOX3x3KERNEL_H */ diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.cpp b/src/core/NEON/kernels/NECannyEdgeKernel.cpp index da33c1b1ea..7a2bf20c04 100644 --- a/src/core/NEON/kernels/NECannyEdgeKernel.cpp +++ b/src/core/NEON/kernels/NECannyEdgeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h" +#include "src/core/NEON/kernels/NECannyEdgeKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -41,22 +41,14 @@ #include #include -using namespace arm_compute; - namespace arm_compute { -class Coordinates; -} // namespace arm_compute - namespace { constexpr int NO_EDGE = 0; constexpr int EDGE = 255; constexpr int MAYBE = 127; -} // namespace -namespace -{ inline uint8x8_t phase_quantization(const float32x4x2_t &gx, const float32x4x2_t &gy) { // Constant use for evaluating score1 and score3 @@ -873,6 +865,8 @@ void edge_trace_U8_U8(uint8_t *__restrict input, uint8_t *__restrict output, con } } // namespace +NEGradientKernel::~NEGradientKernel() = default; + NEGradientKernel::NEGradientKernel() : _func(nullptr), _gx(nullptr), _gy(nullptr), _magnitude(nullptr), _phase(nullptr) { @@ -961,6 +955,7 @@ void NEGradientKernel::run(const Window &window, const ThreadInfo &info) gx, gy, magnitude, phase); } +NEEdgeNonMaxSuppressionKernel::~NEEdgeNonMaxSuppressionKernel() = default; NEEdgeNonMaxSuppressionKernel::NEEdgeNonMaxSuppressionKernel() : _func(nullptr), _magnitude(nullptr), _phase(nullptr), _output(nullptr), _lower_thr(0), _upper_thr(0) { @@ -1045,6 +1040,7 @@ void NEEdgeNonMaxSuppressionKernel::run(const Window &window, const ThreadInfo & magnitude, phase, output); } +NEEdgeTraceKernel::~NEEdgeTraceKernel() = default; NEEdgeTraceKernel::NEEdgeTraceKernel() : _input(nullptr), _output(nullptr) { @@ -1123,3 +1119,4 @@ void NEEdgeTraceKernel::run(const Window &window, const ThreadInfo &info) }, input, output); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.h b/src/core/NEON/kernels/NECannyEdgeKernel.h new file mode 100644 index 0000000000..eff735259d --- /dev/null +++ b/src/core/NEON/kernels/NECannyEdgeKernel.h @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECANNYEDGEKERNEL_H +#define ARM_COMPUTE_NECANNYEDGEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Computes magnitude and quantised phase from inputs gradients. */ +class NEGradientKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGradientKernel"; + } + /** Default constructor */ + NEGradientKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGradientKernel(const NEGradientKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGradientKernel &operator=(const NEGradientKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGradientKernel(NEGradientKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGradientKernel &operator=(NEGradientKernel &&) = default; + /** Default destructor */ + ~NEGradientKernel(); + + /** Initialise the kernel's sources, destinations and border mode. + * + * @note gx, gy and magnitude must all be the same size (either 16 or 32) + * + * @param[in] gx Source tensor - Gx component. Data type supported: S16/S32. + * @param[in] gy Source tensor - Gy component. Data type supported: same as @p gx. + * @param[out] magnitude Destination tensor - Magnitude. Data type supported: U16 (if the data type of @p gx is S16) / U32 (if the data type of @p gx is S32). + * @param[out] phase Destination tensor - Quantized phase. Data type supported: U8. + * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm + */ + virtual void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +protected: + /** Common signature for all the specialised gradient functions + * + * @param[in] gx_ptr Pointer to the first input tensor. + * @param[in] gy_ptr Pointer to the second input tensor. + * @param[out] magnitude_ptr Pointer to the first output tensor + * @param[out] phase_ptr Pointer to the second output tensor + */ + using GradientFunction = void(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr); + + GradientFunction *_func; /**< Gradient function to use for the particular tensor types passed to configure() */ + const ITensor *_gx; /**< Source tensor - Gx component */ + const ITensor *_gy; /**< Source tensor - Gy component */ + ITensor *_magnitude; /**< Destination tensor - Magnitude */ + ITensor *_phase; /**< Destination tensor - Quantized phase */ +}; + +/** NEON kernel to perform Non-Maxima suppression for Canny Edge. + * + * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input + * to characterize points as possible edges. Thus, at the end, each point will be set to EDGE, NO_EDGE or MAYBE. + * + * @note Hysteresis is computed in @ref NEEdgeTraceKernel + */ +class NEEdgeNonMaxSuppressionKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEEdgeNonMaxSuppressionKernel"; + } + /** Default constructor */ + NEEdgeNonMaxSuppressionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeNonMaxSuppressionKernel(const NEEdgeNonMaxSuppressionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeNonMaxSuppressionKernel &operator=(const NEEdgeNonMaxSuppressionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEEdgeNonMaxSuppressionKernel(NEEdgeNonMaxSuppressionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEEdgeNonMaxSuppressionKernel &operator=(NEEdgeNonMaxSuppressionKernel &&) = default; + /** Default destructor */ + ~NEEdgeNonMaxSuppressionKernel(); + + /** Initialise the kernel's sources, destination and border mode. + * + * @param[in] magnitude Source tensor - Magnitude. Data type supported: U16/U32. + * @param[in] phase Source tensor - Quantized phase. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. It will be filled with 0 for "no edge", 127 for "maybe", 255 for "edge" + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *magnitude, const ITensor *phase, ITensor *output, int32_t upper_thr, int32_t lower_thr, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /** Common signature for all the specialised non-maxima suppression functions + * + * @param[in] magnitude_ptr Pointer to the first input tensor. + * @param[in] phase_ptr Pointer to the second input tensor. + * @param[out] output_ptr Pointer to the output tensor + * @param[in] stride_mag Stride of the magnitude tensor + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + */ + using EdgeNonMaxSupprFunction = void(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t upper_thr, + const int32_t lower_thr); + + EdgeNonMaxSupprFunction *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */ + const ITensor *_magnitude; /**< Source tensor - Magnitude */ + const ITensor *_phase; /**< Source tensor - Quantized phase */ + ITensor *_output; /**< Destination tensor */ + int32_t _lower_thr; /**< Lower threshold used for the hysteresis */ + int32_t _upper_thr; /**< Upper threshold used for the hysteresis */ +}; + +/** NEON kernel to perform Edge tracing */ +class NEEdgeTraceKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEEdgeTraceKernel"; + } + /** Default constructor */ + NEEdgeTraceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeTraceKernel(const NEEdgeTraceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeTraceKernel &operator=(const NEEdgeTraceKernel &) = delete; + /** Allow instances of this class to be moved */ + NEEdgeTraceKernel(NEEdgeTraceKernel &&) = default; + /** Allow instances of this class to be moved */ + NEEdgeTraceKernel &operator=(NEEdgeTraceKernel &&) = default; + /** Default destructor */ + ~NEEdgeTraceKernel(); + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in,out] input Source tensor. Data type supported: U8. Must contain 0 for "no edge", 127 for "maybe", 255 for "edge" + * @param[in,out] output Destination tensor. Data type supported: U8. Must be initialized to 0 (No edge). + */ + void configure(ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + bool is_parallelisable() const override; + +private: + ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NECANNYEDGEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEChannelCombineKernel.cpp b/src/core/NEON/kernels/NEChannelCombineKernel.cpp index 7bd380831b..6bfd4c5bda 100644 --- a/src/core/NEON/kernels/NEChannelCombineKernel.cpp +++ b/src/core/NEON/kernels/NEChannelCombineKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h" +#include "src/core/NEON/kernels/NEChannelCombineKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEChannelCombineKernel.h b/src/core/NEON/kernels/NEChannelCombineKernel.h new file mode 100644 index 0000000000..a3372be4d2 --- /dev/null +++ b/src/core/NEON/kernels/NEChannelCombineKernel.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H +#define ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/** Interface for the channel combine kernel */ +class NEChannelCombineKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEChannelCombineKernel"; + } + /** Default constructor */ + NEChannelCombineKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelCombineKernel(const NEChannelCombineKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelCombineKernel &operator=(const NEChannelCombineKernel &) = delete; + /** Allow instances of this class to be moved */ + NEChannelCombineKernel(NEChannelCombineKernel &&) = default; + /** Allow instances of this class to be moved */ + NEChannelCombineKernel &operator=(NEChannelCombineKernel &&) = default; + /** Default destructor */ + ~NEChannelCombineKernel() = default; + + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 + * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 + * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 + * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8 + * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + */ + void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output); + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 + * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 + * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 + * @param[out] output The multi planar output tensor. Formats supported: NV12/NV21/IYUV/YUV444 + */ + void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + bool is_parallelisable() const override; + +private: + /** Combine 3 planes to form a three channel single plane tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_3C(const Window &win); + /** Combine 4 planes to form a four channel single plane tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_4C(const Window &win); + /** Combine 3 planes to form a single plane YUV tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + template + void combine_YUV_1p(const Window &win); + /** Combine 3 planes to form a two plane YUV tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_YUV_2p(const Window &win); + /** Combine 3 planes to form a three plane YUV tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_YUV_3p(const Window &win); + /** Copies a full plane to the output tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void copy_plane(const Window &win, uint32_t plane_id); + /** Common signature for all the specialised ChannelCombine functions + * + * @param[in] window Region on which to execute the kernel. + */ + using ChannelCombineFunction = void (NEChannelCombineKernel::*)(const Window &window); + /** ChannelCombine function to use for the particular tensor types passed to configure() */ + ChannelCombineFunction _func; + std::array _planes; + ITensor *_output; + IMultiImage *_output_multi; + std::array _x_subsampling; + std::array _y_subsampling; + unsigned int _num_elems_processed_per_iteration; + bool _is_parallelizable; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEChannelExtractKernel.cpp b/src/core/NEON/kernels/NEChannelExtractKernel.cpp index 86245acd05..d0d1c6852f 100644 --- a/src/core/NEON/kernels/NEChannelExtractKernel.cpp +++ b/src/core/NEON/kernels/NEChannelExtractKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h" +#include "src/core/NEON/kernels/NEChannelExtractKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -29,11 +29,11 @@ #include "arm_compute/core/IMultiImage.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/MultiImageInfo.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEChannelExtractKernel.h b/src/core/NEON/kernels/NEChannelExtractKernel.h new file mode 100644 index 0000000000..0b2847d79c --- /dev/null +++ b/src/core/NEON/kernels/NEChannelExtractKernel.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H +#define ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/** Interface for the channel extract kernel */ +class NEChannelExtractKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEChannelExtractKernel"; + } + /** Default constructor */ + NEChannelExtractKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelExtractKernel(const NEChannelExtractKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelExtractKernel &operator=(const NEChannelExtractKernel &) = delete; + /** Allow instances of this class to be moved */ + NEChannelExtractKernel(NEChannelExtractKernel &&) = default; + /** Allow instances of this class to be moved */ + NEChannelExtractKernel &operator=(NEChannelExtractKernel &&) = default; + /** Default destructor */ + ~NEChannelExtractKernel() = default; + + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 + * @param[in] channel Channel to extract. + * @param[out] output Destination tensor. Format supported: U8 + */ + void configure(const ITensor *input, Channel channel, ITensor *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 + * @param[in] channel Channel to extract. + * @param[out] output Single-planar destination image. Format supported: U8 + */ + void configure(const IMultiImage *input, Channel channel, IImage *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Extract one channel from a two channel planar tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_1C_from_2C_img(const Window &win); + /** Extract one channel from a three channel planar tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_1C_from_3C_img(const Window &win); + /** Extract one channel from a four channel planar tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_1C_from_4C_img(const Window &win); + /** Extract U/V channel from a single planar YUVY/UYVY tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_YUYV_uv(const Window &win); + /** Copies a full plane to the output tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void copy_plane(const Window &win); + /** Common signature for all the specialised ChannelExtract functions + * + * @param[in] window Region on which to execute the kernel. + */ + using ChannelExtractFunction = void (NEChannelExtractKernel::*)(const Window &window); + /** ChannelExtract function to use for the particular tensor types passed to configure() */ + ChannelExtractFunction _func; + unsigned int _lut_index; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H */ diff --git a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp index 6d04d71534..6e16f24956 100644 --- a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp +++ b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h" +#include "src/core/NEON/kernels/NEChannelShuffleLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.h b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.h new file mode 100644 index 0000000000..c7d09df08e --- /dev/null +++ b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H +#define ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the channel shuffle kernel */ +class NEChannelShuffleLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEChannelShuffleLayerKernel"; + } + /** Default constructor */ + NEChannelShuffleLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelShuffleLayerKernel(const NEChannelShuffleLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelShuffleLayerKernel &operator=(const NEChannelShuffleLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEChannelShuffleLayerKernel(NEChannelShuffleLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEChannelShuffleLayerKernel &operator=(NEChannelShuffleLayerKernel &&) = default; + /** Default destructor */ + ~NEChannelShuffleLayerKernel() = default; + /** Configure function's inputs and outputs. + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. + */ + void configure(const ITensor *input, ITensor *output, unsigned int num_groups); + /** Static function to check if given info will lead to a valid configuration of @ref NEChannelShuffleLayerKernel + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; + unsigned int _num_groups; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NECol2ImKernel.cpp b/src/core/NEON/kernels/NECol2ImKernel.cpp index f3192370a6..97b68d1321 100644 --- a/src/core/NEON/kernels/NECol2ImKernel.cpp +++ b/src/core/NEON/kernels/NECol2ImKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" +#include "src/core/NEON/kernels/NECol2ImKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NECol2ImKernel.h b/src/core/NEON/kernels/NECol2ImKernel.h new file mode 100644 index 0000000000..59d1d741b6 --- /dev/null +++ b/src/core/NEON/kernels/NECol2ImKernel.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECOL2IMKERNEL_H +#define ARM_COMPUTE_NECOL2IMKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +#include "arm_compute/core/Size2D.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform col2im reshaping. + * + * Rearranges each matrix column into image blocks. It's the inverse operation of @ref NEIm2ColKernel. + * + * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: + * + * @f[ + * \left( \begin{array}{ccccccccc} + * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccc} + * a0 & a1 & a2 \\ + * a3 & a4 & a5 \\ + * a6 & a7 & a8 \\ + * \end{array} \right) + * @f] + */ +class NECol2ImKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NECol2ImKernel"; + } + /** Default constructor */ + NECol2ImKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECol2ImKernel(const NECol2ImKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECol2ImKernel &operator=(const NECol2ImKernel &) = delete; + /** Allow instances of this class to be moved */ + NECol2ImKernel(NECol2ImKernel &&) = default; + /** Allow instances of this class to be moved */ + NECol2ImKernel &operator=(NECol2ImKernel &&) = default; + /** Default destructor */ + ~NECol2ImKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Data types supported: All + * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input + * @param[in] convolved_dims Output convolved dimensions. + */ + void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims); + /** Static function to check if given info will lead to a valid configuration of @ref NECol2ImKernel + * + * @param[in] input The input tensor to convert. Data types supported: All + * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input + * @param[in] convolved_dims Output convolved dimensions. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run the col2im + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run_col2im(const Window &window); + + /** Common signature for all the specialised col2im functions + * + * @param[in] window Region on which to execute the kernel. + */ + using Col2ImFunctionPtr = void (NECol2ImKernel::*)(const Window &window); + + Col2ImFunctionPtr _func; + const ITensor *_input; + ITensor *_output; + Size2D _convolved_dims; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NECOL2IMKERNEL_H */ diff --git a/src/core/NEON/kernels/NEColorConvertKernel.cpp b/src/core/NEON/kernels/NEColorConvertKernel.cpp index f933a2a898..23270d42d1 100644 --- a/src/core/NEON/kernels/NEColorConvertKernel.cpp +++ b/src/core/NEON/kernels/NEColorConvertKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h" +#include "src/core/NEON/kernels/NEColorConvertKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEColorConvertKernel.h b/src/core/NEON/kernels/NEColorConvertKernel.h new file mode 100644 index 0000000000..1adb624aae --- /dev/null +++ b/src/core/NEON/kernels/NEColorConvertKernel.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_COLORCONVERTKERNEL_H +#define ARM_COMPUTE_COLORCONVERTKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/** Interface for the color convert kernel */ +class NEColorConvertKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEColorConvertKernel"; + } + /** Default constructor */ + NEColorConvertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEColorConvertKernel(const NEColorConvertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEColorConvertKernel &operator=(const NEColorConvertKernel &) = delete; + /** Allow instances of this class to be moved */ + NEColorConvertKernel(NEColorConvertKernel &&) = default; + /** Allow instances of this class to be moved */ + NEColorConvertKernel &operator=(NEColorConvertKernel &&) = default; + /** Default destructor */ + ~NEColorConvertKernel() = default; + + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 + * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), + * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), + * U8 (if the formats of @p input is RGB888) + */ + void configure(const ITensor *input, ITensor *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 + */ + void configure(const IMultiImage *input, IImage *output); + /** Set the input and output of the kernel + * + * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) + */ + void configure(const IImage *input, IMultiImage *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) + */ + void configure(const IMultiImage *input, IMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + using ColorConvertFunction = void(const void *__restrict input_ptr, void *__restrict output_ptr, const Window &win); + const void *_input; + void *_output; + ColorConvertFunction *_func; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NECOLORCONVERTKERNEL_H */ diff --git a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp index 8716cfd9b5..597c283a9c 100644 --- a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp +++ b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" +#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" diff --git a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h new file mode 100644 index 0000000000..766ee8858a --- /dev/null +++ b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H +#define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa. + * + * @note This function can be applied to the 2D weights used by a Fully Connected layer if: + * - It follows a Convolution layer + * - The data layout used by the network does not match the one the model has been trained in. + * + * @note This function assumes the weights are already reshaped (transposed) + */ +class NEConvertFullyConnectedWeightsKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEConvertFullyConnectedWeightsKernel"; + } + /** Default constructor */ + NEConvertFullyConnectedWeightsKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvertFullyConnectedWeightsKernel(const NEConvertFullyConnectedWeightsKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvertFullyConnectedWeightsKernel &operator=(const NEConvertFullyConnectedWeightsKernel &) = delete; + /** Allow instances of this class to be moved */ + NEConvertFullyConnectedWeightsKernel(NEConvertFullyConnectedWeightsKernel &&) = default; + /** Allow instances of this class to be moved */ + NEConvertFullyConnectedWeightsKernel &operator=(NEConvertFullyConnectedWeightsKernel &&) = default; + /** Default destructor */ + ~NEConvertFullyConnectedWeightsKernel() = default; + /** Set the input and output tensor. + * + * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. + * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. + * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). + * @param[in] data_layout The data layout the weights have been trained in. + */ + void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout); + /** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeightsKernel + * + * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All. + * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input. + * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). + * @param[in] data_layout The data layout the weights have been trained in. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run the permute + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run_convert_fc_weights(const Window &window); + + const ITensor *_input; + ITensor *_output; + unsigned int _factor1; /* equals to the number of elements per original input plane if @p data_layout == NCHW; its number of channels otherwise */ + unsigned int _factor2; /* equals to the number of elements per original input plane if @p data_layout == NHWC; its number of channels otherwise */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H */ diff --git a/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp index bd8ea30fb3..1f2170f42a 100644 --- a/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp +++ b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h new file mode 100644 index 0000000000..2f80361ba5 --- /dev/null +++ b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H +#define ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** NEON kernel to convert asymmetric signed to asymmetric signed and vice-versa */ +class NEConvertQuantizedSignednessKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEConvertQuantizedSignednessKernel"; + } + /** Default constructor */ + NEConvertQuantizedSignednessKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEConvertQuantizedSignednessKernel(const NEConvertQuantizedSignednessKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEConvertQuantizedSignednessKernel &operator=(const NEConvertQuantizedSignednessKernel &) = delete; + /** Allow instances of this class to be moved */ + NEConvertQuantizedSignednessKernel(NEConvertQuantizedSignednessKernel &&) = default; + /** Allow instances of this class to be moved */ + NEConvertQuantizedSignednessKernel &operator=(NEConvertQuantizedSignednessKernel &&) = default; + /** Default destructor */ + ~NEConvertQuantizedSignednessKernel() = default; + /** Initialize the kernel's input, output. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data types supported: opposite of @p input. + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED. + * @param[in] output Destination tensor. Data types supported: opposite of @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H */ diff --git a/src/core/NEON/kernels/NEConvolutionKernel.cpp b/src/core/NEON/kernels/NEConvolutionKernel.cpp index 69b65b2816..bac27430f9 100644 --- a/src/core/NEON/kernels/NEConvolutionKernel.cpp +++ b/src/core/NEON/kernels/NEConvolutionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" +#include "src/core/NEON/kernels/NEConvolutionKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEConvolutionKernel.h b/src/core/NEON/kernels/NEConvolutionKernel.h new file mode 100644 index 0000000000..b8bf1d169e --- /dev/null +++ b/src/core/NEON/kernels/NEConvolutionKernel.h @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECONVOLUTIONKERNEL_H +#define ARM_COMPUTE_NECONVOLUTIONKERNEL_H + +#include "src/core/NEON/INEKernel.h" +#include "src/core/NEON/INESimpleKernel.h" + +#include +#include +#include + +namespace arm_compute +{ +class ITensor; + +/****************************************************************************************\ + * Square Convolution * +\****************************************************************************************/ + +/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9). + * The client can supply a convolution matrix \f$ C_{m,n} \f$. + * @f{eqnarray}{ + * k_0 &=& \frac{m}{2} \\ + * l_0 &=& \frac{n}{2} \\ + * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l} + * @f} + * + * @note The above equation for this function is similar to the default OpenCV Filter2D function, + * which actually computes a correlation and not a convolution. + * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically. + */ +template +class NEConvolutionKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEConvolutionKernel"; + } + /** Default constructor */ + NEConvolutionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEConvolutionKernel(const NEConvolutionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEConvolutionKernel &operator=(const NEConvolutionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEConvolutionKernel(NEConvolutionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEConvolutionKernel &operator=(NEConvolutionKernel &&) = default; + /** Default destructor */ + ~NEConvolutionKernel() = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + template + void convolution(const Window &win); + +protected: + uint32_t _scale; /**< scale of the convolution */ + std::array _convolution; /**< convolution matrix */ +}; + +/** Interface for the kernel which applied a 3x3 convolution to a tensor.*/ +using NEConvolution3x3Kernel = NEConvolutionKernel<3>; +/** Interface for the kernel which applied a 5x5 convolution to a tensor.*/ +using NEConvolution5x5Kernel = NEConvolutionKernel<5>; +/** Interface for the kernel which applied a 7x7 convolution to a tensor.*/ +using NEConvolution7x7Kernel = NEConvolutionKernel<7>; +///** Interface for the kernel which applied a 9x9 convolution to a tensor.*/ +using NEConvolution9x9Kernel = NEConvolutionKernel<9>; + +/****************************************************************************************\ + * Separable Square Convolution * +\****************************************************************************************/ + +/** Kernel for the Horizontal pass of a Separable Convolution */ +template +class NESeparableConvolutionHorKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NESeparableConvolutionHorKernel"; + } + /** Default constructor */ + NESeparableConvolutionHorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NESeparableConvolutionHorKernel(const NESeparableConvolutionHorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NESeparableConvolutionHorKernel &operator=(const NESeparableConvolutionHorKernel &) = delete; + /** Allow instances of this class to be moved */ + NESeparableConvolutionHorKernel(NESeparableConvolutionHorKernel &&) = default; + /** Allow instances of this class to be moved */ + NESeparableConvolutionHorKernel &operator=(NESeparableConvolutionHorKernel &&) = default; + /** Default destructor */ + ~NESeparableConvolutionHorKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data types supported: U16, S16, S32. + * @param[in] conv_row Convolution matrix to apply to the input tensor. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv_row, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /** Apply the object's convolution to the given window of the input tensor.. + * + * @param[in] window Window to apply the convolution on. + */ + template + void convolve(const Window &window); + + std::array _conv_row; /**< Convolution coefficients */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel which applied a 5x1 horizontal convolution to a tensor.*/ +using NESeparableConvolution5x5HorKernel = NESeparableConvolutionHorKernel<5>; +/** Interface for the kernel which applied a 7x1 horizontal convolution to a tensor.*/ +using NESeparableConvolution7x7HorKernel = NESeparableConvolutionHorKernel<7>; +/** Interface for the kernel which applied a 9x1 horizontal convolution to a tensor.*/ +using NESeparableConvolution9x9HorKernel = NESeparableConvolutionHorKernel<9>; + +/** Kernel for the Vertical pass of a Separable Convolution */ +template +class NESeparableConvolutionVertKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NESeparableConvolutionVertKernel"; + } + /** Default constructor */ + NESeparableConvolutionVertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NESeparableConvolutionVertKernel(const NESeparableConvolutionVertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NESeparableConvolutionVertKernel &operator=(const NESeparableConvolutionVertKernel &) = delete; + /** Allow instances of this class to be moved */ + NESeparableConvolutionVertKernel(NESeparableConvolutionVertKernel &&) = default; + /** Allow instances of this class to be moved */ + NESeparableConvolutionVertKernel &operator=(NESeparableConvolutionVertKernel &&) = default; + /** Default destructor */ + ~NESeparableConvolutionVertKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U16, S16, S32. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv_col Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv_col, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /** Apply the object's convolution to the given window of the input tensor. + * This function is used if the intermediate values have been stored as U16. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution_u16(const Window &win); + /** Apply the object's convolution to the given window of the input tensor. + * This function is used if the intermediate values have been stored as S16. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution_s16(const Window &win); + /** Apply the object's convolution to the given window of the input tensor. + * This function is used if the intermediate values have been stored as S32. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution_s32(const Window &win); + + std::array _conv_col; /**< Convolution coefficients */ + uint32_t _scale; /**< Convolution's scale */ +}; + +/** Interface for the kernel which applied a 1x5 vertical convolution to a tensor.*/ +using NESeparableConvolution5x5VertKernel = NESeparableConvolutionVertKernel<5>; +/** Interface for the kernel which applied a 1x7 vertical convolution to a tensor.*/ +using NESeparableConvolution7x7VertKernel = NESeparableConvolutionVertKernel<7>; +/** Interface for the kernel which applied a 1x9 vertical convolution to a tensor.*/ +using NESeparableConvolution9x9VertKernel = NESeparableConvolutionVertKernel<9>; + +/****************************************************************************************\ + * Rectangle Convolution * +\****************************************************************************************/ + +/** Kernel for the running convolution on a rectangle matrix. + * + * @note Supports combinations of 3,5,7 and 9. + */ +class NEConvolutionRectangleKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEConvolutionRectangleKernel"; + } + /** Default constructor */ + NEConvolutionRectangleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &) = delete; + /** Allow instances of this class to be moved */ + NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &&) = default; + /** Allow instances of this class to be moved */ + NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &&) = default; + /** Default destructor */ + ~NEConvolutionRectangleKernel() = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] width Width of convolution matrix (Number of columns) + * @param[in] height Height of convolution matrix (Number of rows) + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + unsigned int get_index(uint32_t val); + /** Apply the object's convolution to the given window of the input tensor. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution(const Window &win); + +protected: + const ITensor *_input; /**< Input tensor */ + ITensor *_output; /**< Output tensor */ + uint32_t _scale; /**< Scale of the convolution */ + std::vector _convolution; /**< Convolution matrix */ + BorderSize _border_size; /**< Calculated border width */ + uint32_t _func_idx; /**< Index used to specify convolution function to be used */ + const static unsigned int _nr_supported_sizes + { + 4 + }; /**< Number of supported permutations */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NECONVOLUTIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NECopyKernel.cpp b/src/core/NEON/kernels/NECopyKernel.cpp index b299957b57..337c44c8eb 100644 --- a/src/core/NEON/kernels/NECopyKernel.cpp +++ b/src/core/NEON/kernels/NECopyKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NECopyKernel.h" +#include "src/core/NEON/kernels/NECopyKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NECopyKernel.h b/src/core/NEON/kernels/NECopyKernel.h new file mode 100644 index 0000000000..62b7b803be --- /dev/null +++ b/src/core/NEON/kernels/NECopyKernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECOPYKERNEL_H +#define ARM_COMPUTE_NECOPYKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a copy between two tensors */ +class NECopyKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NECopyKernel"; + } + /** Default constructor */ + NECopyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NECopyKernel(const NECopyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NECopyKernel &operator=(const NECopyKernel &) = delete; + /** Allow instances of this class to be moved */ + NECopyKernel(NECopyKernel &&) = default; + /** Allow instances of this class to be moved */ + NECopyKernel &operator=(NECopyKernel &&) = default; + /** Default destructor */ + ~NECopyKernel() = default; + /** Initialize the kernel's input, output. + * + * @param[in] input Source tensor. Data types supported: All + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] padding (Optional) Padding to be applied to the input tensor + */ + void configure(const ITensor *input, ITensor *output, const PaddingList &padding = PaddingList()); + /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel + * + * @param[in] input Source tensor. Data types supported: All + * @param[in] output Destination tensor. Data types supported: same as @p input. + * @param[in] padding (Optional) Padding to be applied to the input tensor + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding = PaddingList()); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; + PaddingList _padding; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NECOPYKERNEL_H */ diff --git a/src/core/NEON/kernels/NECropKernel.cpp b/src/core/NEON/kernels/NECropKernel.cpp index 5fb55d95a9..c94cdaed22 100644 --- a/src/core/NEON/kernels/NECropKernel.cpp +++ b/src/core/NEON/kernels/NECropKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NECropKernel.h" +#include "src/core/NEON/kernels/NECropKernel.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NECropKernel.h b/src/core/NEON/kernels/NECropKernel.h new file mode 100644 index 0000000000..742215e22b --- /dev/null +++ b/src/core/NEON/kernels/NECropKernel.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEON_CROP_KERNEL_H +#define ARM_COMPUTE_NEON_CROP_KERNEL_H + +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the kernel to perform tensor cropping */ +class NECropKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NECropKernel"; + } + /** Default constructor */ + NECropKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECropKernel(const NECropKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECropKernel &operator=(const NECropKernel &) = delete; + /** Allow instances of this class to be moved */ + NECropKernel(NECropKernel &&) = default; + /** Allow instances of this class to be moved */ + NECropKernel &operator=(NECropKernel &&) = default; + /** Default destructor */ + ~NECropKernel() = default; + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * @note Padding not supported. + * + * @param[in] input Source tensor. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC. + * @param[in] crop_boxes Tensor containing all possible boxes used to crop the image, each represented by 4 normalized values. + * Data type supported: F32 + * @param[in] box_ind One dimensional tensor mapping the @p crop_box_ind to the index of the 3D image in @p input. + * Data type supported: F32 + * @param[out] output Destination tensor. Data type supported: F32 + * @param[in] crop_box_ind Index of the crop box to be used from @p crop_boxes. Default is 0. + * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. + */ + void configure(const ITensor *input, const ITensor *crop_boxes, const ITensor *box_ind, ITensor *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0); + + /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel + * + * @note Supported tensor rank: up to 4 + * @note Padding not supported. + * + * @param[in] input Source tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC. + * @param[in] crop_boxes Tensor info for tensor containing all possible boxes used to crop the image. Data type supported: F32 + * @param[in] box_ind Tensor info for the one dimensional tensor mapping the @p crop_box_ind to the index of the 3D image + * in @p input. Data type supported: F32 + * @param[in] output Destination tensor. Data type supported: F32 + * @param[in] crop_box_ind Index of the crop box to be used from @p crop_boxes. Default is 0. + * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *crop_boxes, const ITensorInfo *box_ind, const ITensorInfo *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0); + + /** Configure output tensor's shape as this can only be determined at runtime. */ + void configure_output_shape(); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + + /** Function to use for in bounds crop for the particular tensor types passed to configure() */ + using InBoundsCropFunction = void(const ITensor *, const ITensor *, float *, Coordinates, int32_t, int32_t, int32_t, bool, bool); + +private: + const ITensor *_input; + const ITensor *_crop_boxes; + const ITensor *_box_ind; + ITensor *_output; + + Coordinates _start; + Coordinates _end; + uint32_t _crop_box_ind; + float _extrapolation_value; + /** The number of rows out of bounds at the start and end of output. */ + std::array _rows_out_of_bounds; + /** The number of columns out of bounds at the start and end of output. */ + std::array _cols_out_of_bounds; + + NECropKernel::InBoundsCropFunction *_in_bounds_crop_function; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEON_CROP_KERNEL_H */ diff --git a/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp b/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp index 5628802783..58a9a2f1fb 100644 --- a/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp +++ b/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h" +#include "src/core/NEON/kernels/NECumulativeDistributionKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NECumulativeDistributionKernel.h b/src/core/NEON/kernels/NECumulativeDistributionKernel.h new file mode 100644 index 0000000000..1f8c65b5fa --- /dev/null +++ b/src/core/NEON/kernels/NECumulativeDistributionKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H +#define ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class IDistribution1D; +class ILut; +class ITensor; +using IImage = ITensor; + +/** Interface for the cumulative distribution (cummulative summmation) calculation kernel. + * + * This kernel calculates the cumulative sum of a given distribution (meaning that each output element + * is the sum of all its previous elements including itself) and creates a lookup table with the normalized + * pixel intensities which is used for improve the constrast of the image. + */ +class NECumulativeDistributionKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NECumulativeDistributionKernel"; + } + /** Default constructor */ + NECumulativeDistributionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECumulativeDistributionKernel(const NECumulativeDistributionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECumulativeDistributionKernel &operator=(const NECumulativeDistributionKernel &) = delete; + /** Allow instances of this class to be moved */ + NECumulativeDistributionKernel(NECumulativeDistributionKernel &&) = default; + /** Allow instances of this class to be moved */ + NECumulativeDistributionKernel &operator=(NECumulativeDistributionKernel &&) = default; + /** Default destructor */ + ~NECumulativeDistributionKernel() = default; + /** Set the input and output distribution. + * + * @param[in] input Input image. Data type supported: U8 + * @param[in] distribution Unnormalized 256-bin distribution of the input image. + * @param[out] cumulative_sum Cummulative distribution (Summed histogram). Should be same size as @p distribution. + * @param[out] output Equalization lookup table. Should consist of 256 entries of U8 elements. + */ + void configure(const IImage *input, const IDistribution1D *distribution, IDistribution1D *cumulative_sum, ILut *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + bool is_parallelisable() const override; + +private: + const IImage *_input; /**< Input image. */ + const IDistribution1D *_distribution; /**< Input histogram of the input image. */ + IDistribution1D *_cumulative_sum; /**< The cummulative distribution. */ + ILut *_output; /**< Output with the equalization lookup table. */ +private: + static const uint32_t _histogram_size = 256; /**< Default histogram size of 256. */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp index b500268477..ba90bfcd4f 100644 --- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h new file mode 100644 index 0000000000..02c5479f93 --- /dev/null +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H +#define ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the depth concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class NEDepthConcatenateLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEDepthConcatenateLayerKernel"; + } + /** Default constructor */ + NEDepthConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConcatenateLayerKernel(const NEDepthConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConcatenateLayerKernel &operator=(const NEDepthConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDepthConcatenateLayerKernel(NEDepthConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDepthConcatenateLayerKernel &operator=(NEDepthConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~NEDepthConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] output Output tensor info. Data types supported: Same as @p input. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + using DepthConcatFunction = void(const ITensor *in, ITensor *out, unsigned int depth_offset, const Window &window); + +private: + DepthConcatFunction *_func; + unsigned int _depth_offset; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp index 259ece7c6f..d6c89a4553 100644 --- a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h" +#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.h b/src/core/NEON/kernels/NEDepthConvertLayerKernel.h new file mode 100644 index 0000000000..30fe1ed2e6 --- /dev/null +++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_DEPTHCONVERTKERNEL_H +#define ARM_COMPUTE_DEPTHCONVERTKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Depth conversion kernel + * This function ignores the scale and zeroPoint of quanized tensors, i.e. QASYMM8 input is treated as uint8 values. + */ +class NEDepthConvertLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEDepthConvertLayerKernel"; + } + /** Default constructor*/ + NEDepthConvertLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConvertLayerKernel(const NEDepthConvertLayerKernel &) = delete; + /** Default move constructor */ + NEDepthConvertLayerKernel(NEDepthConvertLayerKernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConvertLayerKernel &operator=(const NEDepthConvertLayerKernel &) = delete; + /** Default move assignment operator */ + NEDepthConvertLayerKernel &operator=(NEDepthConvertLayerKernel &&) = default; + /** Default destructor */ + ~NEDepthConvertLayerKernel() = default; + /** Set the input and output of the kernel + * + * Valid conversions Input -> Output : + * + * - QASYMM8_SIGNED -> S16, S32, F32, F16 + * - QASYMM8 -> U16, S16, S32, F32, F16 + * - U8 -> U16, S16, S32, F32, F16 + * - U16 -> U8, U32 + * - S16 -> QASYMM8_SIGNED, U8, S32 + * - BFLOAT16 -> F32 + * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8 + * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8 + * - F32 -> QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8 + * + * @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/BFLOAT16/F16/F32. + * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32. + * @param[in] policy Conversion policy. + * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0); + /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayerKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/BFLOAT16/F16/F32. + * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32. + * @param[in] policy Conversion policy + * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; + ConvertPolicy _policy; + uint32_t _shift; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp index 403e7aac9f..6dcc85ec2e 100644 --- a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h" +#include "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h new file mode 100644 index 0000000000..7e18dd88b8 --- /dev/null +++ b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H +#define ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the depth to space kernel */ +class NEDepthToSpaceLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEDepthToSpaceLayerKernel"; + } + /** Default constructor */ + NEDepthToSpaceLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthToSpaceLayerKernel(const NEDepthToSpaceLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthToSpaceLayerKernel &operator=(const NEDepthToSpaceLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDepthToSpaceLayerKernel(NEDepthToSpaceLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDepthToSpaceLayerKernel &operator=(NEDepthToSpaceLayerKernel &&) = default; + /** Default destructor */ + ~NEDepthToSpaceLayerKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape x value. + */ + void configure(const ITensor *input, ITensor *output, int32_t block_shape); + /** Static function to check if given info will lead to a valid configuration of @ref NEDepthToSpaceLayerKernel. + * + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All + * @param[in] output Tensor output info. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ + int32_t _block_shape; /**< Block shape */ + DataLayout _data_layout; /**< Data layout of the operation */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp index 533b374594..6e5322cbc6 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" +#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h new file mode 100644 index 0000000000..713cdcd9d9 --- /dev/null +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H +#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H + +#include "arm_compute/core/utils/misc/Traits.h" +#include "src/core/NEON/INEKernel.h" +#include "support/Requires.h" + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#include +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the kernel to run a depthwise convolution native on a tensor. */ +class NEDepthwiseConvolutionLayerNativeKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEDepthwiseConvolutionLayerNativeKernel"; + } + /** Default constructor */ + NEDepthwiseConvolutionLayerNativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthwiseConvolutionLayerNativeKernel(const NEDepthwiseConvolutionLayerNativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthwiseConvolutionLayerNativeKernel &operator=(const NEDepthwiseConvolutionLayerNativeKernel &) = delete; + /** Default Move Constructor. */ + NEDepthwiseConvolutionLayerNativeKernel(NEDepthwiseConvolutionLayerNativeKernel &&) = default; + /** Default move assignment operator */ + NEDepthwiseConvolutionLayerNativeKernel &operator=(NEDepthwiseConvolutionLayerNativeKernel &&) = default; + /** Default destructor */ + ~NEDepthwiseConvolutionLayerNativeKernel() = default; + /** Initialize the function's source, destination and parameters. + * + * @note Supported data layouts: NHWC + * + * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [IFM, W, H]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * + */ + void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, + const Size2D &dilation = Size2D(1U, 1U)); + /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerNativeKernel + * + * @note Supported data layouts: NHWC + * + * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H]. + * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. + * @param[in] output Destination tensor info. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, + const Size2D &dilation = Size2D(1U, 1U)); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + template + using FloatEnalber = typename std::enable_if::value, int>::type; + + template = 0> + void run_depthwise(const Window &window, bool has_biases); + + template + using Quantized8bitEnalber = typename std::enable_if < std::is_same::value || std::is_same::value, int >::type; + + template = 0> + void run_depthwise(const Window &window, bool has_biases); + + /** Common signature for all the specialised depthwise convolution native functions + * + * @param[in] window Region on which to execute the kernel. + */ + using DepthwiseFunctionPtr = void (NEDepthwiseConvolutionLayerNativeKernel::*)(const Window &window, bool has_biases); + + DepthwiseFunctionPtr _func; + const ITensor *_input; + const ITensor *_weights; + const ITensor *_biases; + ITensor *_output; + PadStrideInfo _conv_info; + unsigned int _depth_multiplier; + Size2D _dilation; + std::vector _output_multiplier; + std::vector _output_shift; + bool _has_biases; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp index 2f3c6f431c..36e9c92c56 100644 --- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h" +#include "src/core/NEON/kernels/NEDequantizationLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.h b/src/core/NEON/kernels/NEDequantizationLayerKernel.h new file mode 100644 index 0000000000..9cc71922af --- /dev/null +++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the dequantization layer kernel. */ +class NEDequantizationLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEDequantizationLayerKernel"; + } + /** Default constructor */ + NEDequantizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDequantizationLayerKernel(const NEDequantizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDequantizationLayerKernel &operator=(const NEDequantizationLayerKernel &) = delete; + /** Default Move Constructor. */ + NEDequantizationLayerKernel(NEDequantizationLayerKernel &&) = default; + /** Default move assignment operator */ + NEDequantizationLayerKernel &operator=(NEDequantizationLayerKernel &&) = default; + /** Default destructor */ + ~NEDequantizationLayerKernel() = default; + /** Set input, output tensors. + * + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayerKernel + * + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] output Output tensor info. Data types supported: F16/F32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDerivativeKernel.cpp b/src/core/NEON/kernels/NEDerivativeKernel.cpp index 5d3fc01bd2..8d641a33b9 100644 --- a/src/core/NEON/kernels/NEDerivativeKernel.cpp +++ b/src/core/NEON/kernels/NEDerivativeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h" +#include "src/core/NEON/kernels/NEDerivativeKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEDerivativeKernel.h b/src/core/NEON/kernels/NEDerivativeKernel.h new file mode 100644 index 0000000000..112b2b0b28 --- /dev/null +++ b/src/core/NEON/kernels/NEDerivativeKernel.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEDERIVATIVEKERNEL_H +#define ARM_COMPUTE_NEDERIVATIVEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run the derivative along the X/Y directions on a tensor. + * + */ +class NEDerivativeKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEDerivativeKernel"; + } + /** Default constructor */ + NEDerivativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDerivativeKernel(const NEDerivativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDerivativeKernel &operator=(const NEDerivativeKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDerivativeKernel(NEDerivativeKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDerivativeKernel &operator=(NEDerivativeKernel &&) = default; + /** Default destructor */ + ~NEDerivativeKernel() = default; + /** Initialise the kernel's sources, destination and border + * + * @note At least one of output_x or output_y must be set + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /** Function to perform derivative along the X direction on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void derivative_x(const Window &window); + /** Function to perform derivative along the Y direction on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void derivative_y(const Window &window); + /** Function to perform derivative along the X and Y direction on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void derivative_xy(const Window &window); + /** Common signature for all the specialised derivative functions + * + * @param[in] window Region on which to execute the kernel. + */ + using DerivativeFunction = void (NEDerivativeKernel::*)(const Window &window); + /** Derivative function to use for the particular tensor types passed to configure() */ + DerivativeFunction _func; + +private: + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< Output tensor - Derivate along the X direction */ + ITensor *_output_y; /**< Output tensor - Derivate along the Y direction */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEDERIVATIVEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDilateKernel.cpp b/src/core/NEON/kernels/NEDilateKernel.cpp index cc781c699f..dc9ec22c71 100644 --- a/src/core/NEON/kernels/NEDilateKernel.cpp +++ b/src/core/NEON/kernels/NEDilateKernel.cpp @@ -21,13 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDilateKernel.h" +#include "src/core/NEON/kernels/NEDilateKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEDilateKernel.h b/src/core/NEON/kernels/NEDilateKernel.h new file mode 100644 index 0000000000..f1d34318ed --- /dev/null +++ b/src/core/NEON/kernels/NEDilateKernel.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEDILATEKERNEL_H +#define ARM_COMPUTE_NEDILATEKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform boolean image dilatation */ +class NEDilateKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEDilateKernel"; + } + /** Default constructor */ + NEDilateKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDilateKernel(const NEDilateKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDilateKernel &operator=(const NEDilateKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDilateKernel(NEDilateKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDilateKernel &operator=(NEDilateKernel &&) = default; + /** Default destructor */ + ~NEDilateKernel() = default; + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEDILATEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp index 56cd6e62d0..87b9fb1bf1 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" +#include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" #include "src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h" #include "src/core/NEON/wrapper/wrapper.h" diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h new file mode 100644 index 0000000000..94c97cf521 --- /dev/null +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H +#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON interface for Direct Convolution Layer kernel */ +class NEDirectConvolutionLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEDirectConvolutionLayerKernel"; + } + /** Default constructor */ + NEDirectConvolutionLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerKernel(const NEDirectConvolutionLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerKernel &operator=(const NEDirectConvolutionLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerKernel(NEDirectConvolutionLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerKernel &operator=(NEDirectConvolutionLayerKernel &&) = default; + /** Default destructor */ + ~NEDirectConvolutionLayerKernel() = default; + /** Set the input, weights, and output tensors. + * + * @note: DirectConvolution only works in the following configurations: + * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 + * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 + * + * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * The 3rd dimension must be the same as the input's volume 3rd dimension. + * Data type supported:Same as @p input. + * @param[out] output Output tensor. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32 + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info); + /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerKernel + * + * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * The 3rd dimension must be the same as the input's volume 3rd dimension. + * Data type supported:Same as @p input. + * @param[in] output Output tensor. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32 + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /* Template function for optimized convolution NHWC */ + template + void convolve_nhwc_optimized(const Window &window); + + /* Template function for convolution NHWC */ + template + void convolve_nhwc(const Window &window); + + const ITensor *_input; + const ITensor *_weights; + ITensor *_output; + PadStrideInfo _conv_info; + BorderSize _border_size; + unsigned int _kernel_size; + unsigned int _num_weight_elems_read_per_row; + unsigned int _num_elems_read_per_iteration; + unsigned int _num_elems_written_per_iteration; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp index abaaf12e92..de5a88e812 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" +#include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h new file mode 100644 index 0000000000..b1b88103bf --- /dev/null +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H +#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; +/** NEON kernel to accumulate the biases, if provided, or downscale in case of quantized input. + * + * @note We assume bias to be shared + * @note For quantized computations (i.e. @p input of S32 type) the output data type for auto-initialization must be passed as part + * of the @ref DirectConvolutionLayerOutputStageKernelInfo. + */ +class NEDirectConvolutionLayerOutputStageKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEDirectConvolutionLayerOutputStageKernel"; + } + /** Default constructor */ + NEDirectConvolutionLayerOutputStageKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerOutputStageKernel(const NEDirectConvolutionLayerOutputStageKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerOutputStageKernel &operator=(const NEDirectConvolutionLayerOutputStageKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerOutputStageKernel(NEDirectConvolutionLayerOutputStageKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerOutputStageKernel &operator=(NEDirectConvolutionLayerOutputStageKernel &&) = default; + /** Default destructor */ + ~NEDirectConvolutionLayerOutputStageKernel() = default; + /** Set the accumulate buffer and the biases of the kernel. + * + * @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place. + * Data type supported: F16/F32/S32 + * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input + * @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) + * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr. + * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p input is S32 + * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata + */ + void configure(ITensor *input, const ITensor *bias = nullptr, ITensor *output = nullptr, + const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerOutputStageKernel + * + * @param[in] input Input to add the bias to. If @p output is not specified then accumulation is done in-place. + * Data type supported: F16/F32/S32 + * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input + * @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) + * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr. + * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p input is S32 + * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias = nullptr, const ITensorInfo *output = nullptr, + const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo()); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + using OutputStageKernel = void(ITensor *input, const ITensor *bias, const Window &window, ITensor *output, + int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias); + +private: + OutputStageKernel *_func; + ITensor *_input; + const ITensor *_bias; + ITensor *_output; + int _result_fixedpoint_multiplier; + int _result_shift; + int _result_offset_after_shift; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index efe6161096..bb4e9a67b6 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h" +#include "src/core/NEON/kernels/NEElementwiseOperationKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.h b/src/core/NEON/kernels/NEElementwiseOperationKernel.h new file mode 100644 index 0000000000..b0037d357f --- /dev/null +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.h @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H +#define ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for an element-wise operation kernel + * + * Element-wise operation is computed by: + * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f] + * + */ +class NEElementwiseOperationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEElementwiseOperationKernel"; + } + /** Default constructor */ + NEElementwiseOperationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseOperationKernel(const NEElementwiseOperationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseOperationKernel &operator=(const NEElementwiseOperationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEElementwiseOperationKernel(NEElementwiseOperationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEElementwiseOperationKernel &operator=(NEElementwiseOperationKernel &&) = default; + /** Default destructor */ + ~NEElementwiseOperationKernel() = default; + + /** Common signature for all the specialised arithmetic functions + * + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[out] output Output tensor info. Data types supported: Dependent on subclass. + * @param[in] window Region on which to execute the kernel. + */ + using ElementwiseFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +protected: + /** Validate the argument passed to the kernel + * + * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. + * @param[in] output Output tensor. Data types supported: Dependent on subclass. + */ + static Status validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); + + /** Commmon configure function for element-wise operators with no additional options (e.g. Min, Max, SquaredDiff) + * + */ + void configure_common(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + + /** Function to use for the particular tensor types passed to configure() */ + std::function _function; + + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; +}; + +class NEArithmeticOperationKernel : public NEElementwiseOperationKernel +{ +public: + /** Default constructor */ + NEArithmeticOperationKernel() = default; + + /** Configure kernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[out] output Output tensor info. Data types supported: Same as @p input1. + */ + void configure(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + + /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * + * @return a Status + */ + static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + +protected: + // Inherited methods overridden: + static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); +}; + +class NEDivisionOperationKernel : public NEArithmeticOperationKernel +{ +public: + /** Default constructor */ + NEDivisionOperationKernel() = default; + + /** Configure kernel + * + * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[out] output Output tensor info. Data types supported: Same as @p input1. + */ + void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + + /** Static function to check if given info will lead to a valid configuration of @ref NEDivisionOperationKernel + * + * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * + * @return a Status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + +protected: + // Inherited methods overridden: + static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); +}; + +class NEPowerOperationKernel : public NEArithmeticOperationKernel +{ +public: + /** Default constructor */ + NEPowerOperationKernel() = default; + + /** Configure kernel + * + * @param[in] input1 First tensor input info. Data types supported: F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[out] output Output tensor info. Data types supported: Same as @p input1. + */ + void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + + /** Static function to check if given info will lead to a valid configuration of @ref NEPowerOperationKernel + * + * @param[in] input1 First tensor input info. Data types supported: F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * + * @return a Status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + +protected: + // Inherited methods overridden: + static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); +}; + +class NEComparisonOperationKernel : public NEElementwiseOperationKernel +{ +public: + /** Default constructor */ + NEComparisonOperationKernel() = default; + + /** Configure kernel + * + * @param[in] op Comparison operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[out] output Output tensor info. Data types supported: U8. + */ + void configure(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); + + /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel + * + * @param[in] op Comparison operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: U8. + * + * @return a Status + */ + static Status validate(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + +protected: + // Inherited methods overridden: + static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp b/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp index 8e4b7eda30..d899643fdc 100644 --- a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h" +#include "src/core/NEON/kernels/NEElementwiseUnaryKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEElementwiseUnaryKernel.h b/src/core/NEON/kernels/NEElementwiseUnaryKernel.h new file mode 100644 index 0000000000..fcf0aa51c5 --- /dev/null +++ b/src/core/NEON/kernels/NEElementwiseUnaryKernel.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H +#define ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for an element-wise unary operation kernel + * + * Element-wise operation is computed by: + * @f[ output(x) = OP(input(x))@f] + * + */ +class NEElementwiseUnaryKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEElementwiseUnaryKernel"; + } + /** Default constructor */ + NEElementwiseUnaryKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseUnaryKernel(const NEElementwiseUnaryKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseUnaryKernel &operator=(const NEElementwiseUnaryKernel &) = delete; + /** Allow instances of this class to be moved */ + NEElementwiseUnaryKernel(NEElementwiseUnaryKernel &&) = default; + /** Allow instances of this class to be moved */ + NEElementwiseUnaryKernel &operator=(NEElementwiseUnaryKernel &&) = default; + /** Default destructor */ + ~NEElementwiseUnaryKernel() = default; + + /** Function to configure the @ref NEElementwiseUnaryKernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input First tensor input. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. + * @param[out] output Output tensor. Data types supported: Same as @p input. + */ + void configure(ElementWiseUnary op, const ITensor *input, ITensor *output); + + /** Static function to check if given info will lead to a valid configuration of @ref NEElementwiseUnaryKernel + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input First tensor input info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a Status + */ + static Status validate(ElementWiseUnary op, const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised arithmetic functions + * + * @param[in] window Region on which to execute the kernel. + */ + using ElementwiseUnaryPtr = void (NEElementwiseUnaryKernel::*)(const Window &window); + + /** Template function to run elementwise unary operation + * + * @tparam ScalarType Scalar datatype + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void elementwise_op(const Window &window); + + ElementwiseUnaryPtr _func; + const ITensor *_input; + ITensor *_output; + ElementWiseUnary _op; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H */ diff --git a/src/core/NEON/kernels/NEErodeKernel.cpp b/src/core/NEON/kernels/NEErodeKernel.cpp index 31b0f487d6..171a6c828f 100644 --- a/src/core/NEON/kernels/NEErodeKernel.cpp +++ b/src/core/NEON/kernels/NEErodeKernel.cpp @@ -21,13 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEErodeKernel.h" +#include "src/core/NEON/kernels/NEErodeKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEErodeKernel.h b/src/core/NEON/kernels/NEErodeKernel.h new file mode 100644 index 0000000000..54f286780b --- /dev/null +++ b/src/core/NEON/kernels/NEErodeKernel.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEERODEKERNEL_H +#define ARM_COMPUTE_NEERODEKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform boolean image erosion */ +class NEErodeKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEErodeKernel"; + } + /** Default constructor */ + NEErodeKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEErodeKernel(const NEErodeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEErodeKernel &operator=(const NEErodeKernel &) = delete; + /** Allow instances of this class to be moved */ + NEErodeKernel(NEErodeKernel &&) = default; + /** Allow instances of this class to be moved */ + NEErodeKernel &operator=(NEErodeKernel &&) = default; + /** Default destructor */ + ~NEErodeKernel() = default; + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEERODEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp b/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp index d8036f2f60..200ee6bf88 100644 --- a/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp +++ b/src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h" +#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEFFTDigitReverseKernel.h b/src/core/NEON/kernels/NEFFTDigitReverseKernel.h new file mode 100644 index 0000000000..f436c364b2 --- /dev/null +++ b/src/core/NEON/kernels/NEFFTDigitReverseKernel.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H +#define ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the digit reverse operation kernel. */ +class NEFFTDigitReverseKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEFFTDigitReverseKernel"; + } + /** Constructor */ + NEFFTDigitReverseKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFTDigitReverseKernel(const NEFFTDigitReverseKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFTDigitReverseKernel &operator=(const NEFFTDigitReverseKernel &) = delete; + /** Default Move Constructor. */ + NEFFTDigitReverseKernel(NEFFTDigitReverseKernel &&) = default; + /** Default move assignment operator */ + NEFFTDigitReverseKernel &operator=(NEFFTDigitReverseKernel &&) = default; + /** Default destructor */ + ~NEFFTDigitReverseKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor). + * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: 2 (complex tensor). + * @param[in] idx Digit reverse index tensor. Data type supported: U32 + * @param[in] config Kernel configuration. + */ + void configure(const ITensor *input, ITensor *output, const ITensor *idx, const FFTDigitReverseKernelInfo &config); + + /** Static function to check if given info will lead to a valid configuration of @ref NEFFTDigitReverseKernel + * + * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor). + * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: 2 (complex tensor). + * @param[in] idx Digit reverse index tensor info. Data type supported: U32 + * @param[in] config Kernel configuration + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + using NEFFTDigitReverseKernelFunctionPtr = void (NEFFTDigitReverseKernel::*)(const Window &window); + + template + void digit_reverse_kernel_axis_0(const Window &window); + + template + void digit_reverse_kernel_axis_1(const Window &window); + + NEFFTDigitReverseKernelFunctionPtr _func; + const ITensor *_input; + ITensor *_output; + const ITensor *_idx; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp b/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp index 1b0af488a2..cb1391ab4e 100644 --- a/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp +++ b/src/core/NEON/kernels/NEFFTRadixStageKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h" +#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEFFTRadixStageKernel.h b/src/core/NEON/kernels/NEFFTRadixStageKernel.h new file mode 100644 index 0000000000..8a695b790f --- /dev/null +++ b/src/core/NEON/kernels/NEFFTRadixStageKernel.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H +#define ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "src/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the FFT kernel. */ +class NEFFTRadixStageKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEFFTRadixStageKernel"; + } + /** Constructor */ + NEFFTRadixStageKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFTRadixStageKernel(const NEFFTRadixStageKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFTRadixStageKernel &operator=(const NEFFTRadixStageKernel &) = delete; + /** Default Move Constructor. */ + NEFFTRadixStageKernel(NEFFTRadixStageKernel &&) = default; + /** Default move assignment operator */ + NEFFTRadixStageKernel &operator=(NEFFTRadixStageKernel &&) = default; + /** Default destructor */ + ~NEFFTRadixStageKernel() = default; + /** Set the input and output tensors. + * + * @note If the output tensor is nullptr, the FFT will be performed in-place + * + * @param[in,out] input Source tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor). + * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: same as @p input. + * @param[in] config FFT descriptor metadata. + */ + void configure(ITensor *input, ITensor *output, const FFTRadixStageKernelInfo &config); + /** Static function to check if given info will lead to a valid configuration of @ref NEFFTRadixStageKernel + * + * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor). + * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: same as @p input. + * @param[in] config FFT descriptor metadata. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config); + /** Returns the radix that are support by the FFT kernel + * + * @return A set of supported radix + */ + static std::set supported_radix(); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + ITensor *_input; + ITensor *_output; + bool _run_in_place; + unsigned int _Nx; + unsigned int _axis; + unsigned int _radix; + + void set_radix_stage_axis0(const FFTRadixStageKernelInfo &config); + void set_radix_stage_axis1(const FFTRadixStageKernelInfo &config); + + using FFTFunctionPointerAxis0 = std::function; + using FFTFunctionPointerAxis1 = std::function; + + FFTFunctionPointerAxis0 _func_0; + FFTFunctionPointerAxis1 _func_1; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEFFTScaleKernel.cpp b/src/core/NEON/kernels/NEFFTScaleKernel.cpp index 0cb8b84db8..6dc5541e94 100644 --- a/src/core/NEON/kernels/NEFFTScaleKernel.cpp +++ b/src/core/NEON/kernels/NEFFTScaleKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFFTScaleKernel.h" +#include "src/core/NEON/kernels/NEFFTScaleKernel.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEFFTScaleKernel.h b/src/core/NEON/kernels/NEFFTScaleKernel.h new file mode 100644 index 0000000000..24a19f98ba --- /dev/null +++ b/src/core/NEON/kernels/NEFFTScaleKernel.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFFTSCALEKERNEL_H +#define ARM_COMPUTE_NEFFTSCALEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the inverse fft scale kernel. */ +class NEFFTScaleKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEFFTScaleKernel"; + } + /** Constructor */ + NEFFTScaleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFTScaleKernel(const NEFFTScaleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFTScaleKernel &operator=(const NEFFTScaleKernel &) = delete; + /** Default Move Constructor. */ + NEFFTScaleKernel(NEFFTScaleKernel &&) = default; + /** Default move assignment operator */ + NEFFTScaleKernel &operator=(NEFFTScaleKernel &&) = default; + /** Default destructor */ + ~NEFFTScaleKernel() = default; + /** Set the input and output tensors. + * + * @param[in,out] input Source tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor). + * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: 1 (real tensor) or 2 (complex tensor). + * @param[in] config Kernel configuration + */ + void configure(ITensor *input, ITensor *output, const FFTScaleKernelInfo &config); + /** Static function to check if given info will lead to a valid configuration of @ref NEFFTScaleKernel + * + * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor). + * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: 1 (real tensor) or 2 (complex tensor). + * @param[in] config Kernel configuration + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + ITensor *_input; + ITensor *_output; + float _scale; + bool _run_in_place; + bool _is_conj; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEFFTSCALEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEFastCornersKernel.cpp b/src/core/NEON/kernels/NEFastCornersKernel.cpp index 99312f5134..c9280d8dc0 100644 --- a/src/core/NEON/kernels/NEFastCornersKernel.cpp +++ b/src/core/NEON/kernels/NEFastCornersKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h" +#include "src/core/NEON/kernels/NEFastCornersKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEFastCornersKernel.h b/src/core/NEON/kernels/NEFastCornersKernel.h new file mode 100644 index 0000000000..a4086afb0c --- /dev/null +++ b/src/core/NEON/kernels/NEFastCornersKernel.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFASTCORNERSKERNEL_H +#define ARM_COMPUTE_NEFASTCORNERSKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** NEON kernel to perform fast corners */ +class NEFastCornersKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEFastCornersKernel"; + } + /** Constructor */ + NEFastCornersKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFastCornersKernel(const NEFastCornersKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFastCornersKernel &operator=(const NEFastCornersKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFastCornersKernel(NEFastCornersKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFastCornersKernel &operator=(NEFastCornersKernel &&) = default; + /** Default destructor */ + ~NEFastCornersKernel() = default; + /** Initialise the kernel. + * + * @param[in] input Source image. Data type supported: U8. + * @param[out] output Output image. Data type supported: U8. + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const IImage *input, IImage *output, uint8_t threshold, bool non_max_suppression, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + const IImage *_input; /**< source image */ + IImage *_output; /**< inermediate results */ + uint8_t _threshold; /**< threshold on difference between intensity */ + bool _non_max_suppression; /** true if non-maxima suppression is applied in the next stage */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEFASTCORNERSKERNEL_H */ diff --git a/src/core/NEON/kernels/NEFillArrayKernel.cpp b/src/core/NEON/kernels/NEFillArrayKernel.cpp index 93798db6c3..e8ae926fbf 100644 --- a/src/core/NEON/kernels/NEFillArrayKernel.cpp +++ b/src/core/NEON/kernels/NEFillArrayKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h" +#include "src/core/NEON/kernels/NEFillArrayKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEFillArrayKernel.h b/src/core/NEON/kernels/NEFillArrayKernel.h new file mode 100644 index 0000000000..c9841679d1 --- /dev/null +++ b/src/core/NEON/kernels/NEFillArrayKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFILLARRAYKERNEL_H +#define ARM_COMPUTE_NEFILLARRAYKERNEL_H + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** This kernel adds all texels greater than or equal to the threshold value to the keypoint array. */ +class NEFillArrayKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEFillArrayKernel"; + } + /** Default contructor */ + NEFillArrayKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillArrayKernel(const NEFillArrayKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillArrayKernel &operator=(const NEFillArrayKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFillArrayKernel(NEFillArrayKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFillArrayKernel &operator=(NEFillArrayKernel &&) = default; + /** Default detructor */ + ~NEFillArrayKernel() = default; + + /** Initialise the kernel. + * + * @param[in] input Source image. Data type supported: U8. + * @param[in] threshold Texels greater than the threshold will be added to the array. + * @param[out] output Arrays of keypoints to store the results. + */ + void configure(const IImage *input, uint8_t threshold, IKeyPointArray *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + bool is_parallelisable() const override; + +private: + const IImage *_input; + IKeyPointArray *_output; + uint8_t _threshold; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEFILLARRAYKERNEL_H*/ diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp index c1dd5cf81f..488079062b 100644 --- a/src/core/NEON/kernels/NEFillBorderKernel.cpp +++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -30,6 +30,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "src/core/helpers/WindowHelpers.h" #include diff --git a/src/core/NEON/kernels/NEFillBorderKernel.h b/src/core/NEON/kernels/NEFillBorderKernel.h new file mode 100644 index 0000000000..65908bebee --- /dev/null +++ b/src/core/NEON/kernels/NEFillBorderKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFILLBORDERKERNEL_H +#define ARM_COMPUTE_NEFILLBORDERKERNEL_H + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the kernel to fill borders */ +class NEFillBorderKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEFillBorderKernel"; + } + /** Default Constructor */ + NEFillBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillBorderKernel(const NEFillBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillBorderKernel &operator=(const NEFillBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFillBorderKernel(NEFillBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFillBorderKernel &operator=(NEFillBorderKernel &&) = default; + /** Default destructor */ + ~NEFillBorderKernel() = default; + + /** Initialise the function. + * + * @note This kernel fills the borders within the XY-planes. + * + * @param[in,out] tensor Tensor to process. Data types supported: All. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + void fill_replicate_single_channel(const Window &window); + void fill_constant_value_single_channel(const Window &window); + + ITensor *_tensor; + BorderSize _border_size; + BorderMode _mode; + PixelValue _constant_border_value; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEFILLBORDERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp index e6b34b6165..8c0dc10ee8 100644 --- a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp +++ b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" +#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEFlattenLayerKernel.h b/src/core/NEON/kernels/NEFlattenLayerKernel.h new file mode 100644 index 0000000000..5fd5f436b2 --- /dev/null +++ b/src/core/NEON/kernels/NEFlattenLayerKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFLATTENLAYERKERNEL_H +#define ARM_COMPUTE_NEFLATTENLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the flatten layer kernel. */ +class NEFlattenLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEFlattenLayerKernel"; + } + /** Default constructor */ + NEFlattenLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFlattenLayerKernel(const NEFlattenLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFlattenLayerKernel &operator=(const NEFlattenLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFlattenLayerKernel(NEFlattenLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFlattenLayerKernel &operator=(NEFlattenLayerKernel &&) = default; + /** Default destructor */ + ~NEFlattenLayerKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input First input tensor to flatten with at least 3 dimensions. + * The dimensions above the third will be interpreted as batches. Data types supported: All + * @param[out] output Output tensor with shape [w*h*d, input_batches] where: + * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEFlattenLayerKernel + * + * @param[in] input First input tensor to flatten with at least 3 dimensions. + * The dimensions above the third will be interpreted as batches. Data types supported: All + * @param[out] output Output tensor with shape [w*h*d, input_batches] where: + * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEFLATTENLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEFloorKernel.cpp b/src/core/NEON/kernels/NEFloorKernel.cpp index 48f964c6a2..2750acdda7 100644 --- a/src/core/NEON/kernels/NEFloorKernel.cpp +++ b/src/core/NEON/kernels/NEFloorKernel.cpp @@ -21,14 +21,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFloorKernel.h" +#include "src/core/NEON/kernels/NEFloorKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Validate.h" #include "src/core/CPP/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEFloorKernel.h b/src/core/NEON/kernels/NEFloorKernel.h new file mode 100644 index 0000000000..99c016bac5 --- /dev/null +++ b/src/core/NEON/kernels/NEFloorKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFLOORKERNEL_H +#define ARM_COMPUTE_NEFLOORKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a floor operation */ +class NEFloorKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEFloorKernel"; + } + /** Constructor */ + NEFloorKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFloorKernel(const NEFloorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFloorKernel &operator=(const NEFloorKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFloorKernel(NEFloorKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFloorKernel &operator=(NEFloorKernel &&) = default; + /** Default destructor */ + ~NEFloorKernel() = default; + /** Set the source, destination of the kernel + * + * @param[in] input Source tensor. Data type supported: F16/F32. + * @param[out] output Destination tensor. Same as @p input + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEFloorKernel + * + * @param[in] input Source tensor info. Data type supported: F16/F32. + * @param[in] output Destination tensor info. Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEFLOORKERNEL_H */ diff --git a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp index e353df1c39..99f830fe06 100644 --- a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h" +#include "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h new file mode 100644 index 0000000000..ee767b01c8 --- /dev/null +++ b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H +#define ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** OpenNE kernel to fuse the batch normalization node to a preceding convolution node */ +class NEFuseBatchNormalizationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEFuseBatchNormalizationKernel"; + } + /** Default constructor */ + NEFuseBatchNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFuseBatchNormalizationKernel(const NEFuseBatchNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFuseBatchNormalizationKernel &operator=(const NEFuseBatchNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFuseBatchNormalizationKernel(NEFuseBatchNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFuseBatchNormalizationKernel &operator=(NEFuseBatchNormalizationKernel &&) = default; + /** Default destructor */ + ~NEFuseBatchNormalizationKernel() = default; + /** Set the source, destination of the kernel + * + * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC + * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights + * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights + * @param[out] fused_weights (Optional) Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights + * @param[out] fused_bias (Optional) Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights + * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights + * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights + * @note if nullptr, bn_beta is set to 0.0 + * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights + * @note if nullptr, bn_gamma is set to 1.0 + * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. + * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. + */ + void configure(const ITensor *input_weights, const ITensor *bn_mean, const ITensor *bn_var, ITensor *fused_weights, ITensor *fused_bias, + const ITensor *input_bias = nullptr, const ITensor *bn_beta = nullptr, const ITensor *bn_gamma = nullptr, + float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + /** Static function to check if given info will lead to a valid configuration of @ref NEFuseBatchNormalizationKernel + * + * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC + * @param[in] bn_mean Batch normalization layer mean tensor info. Same as @p input_weights + * @param[in] bn_var Batch normalization layer variance tensor info. Same as @p input_weights + * @param[in] fused_weights (Optional) Output fused weights tensor info. It can be a nullptr in case of in-place computation. Same as @p input_weights + * @param[in] fused_bias (Optional) Output fused bias tensor info. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights + * @param[in] input_bias (Optional) Input bias tensor info for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights + * @param[in] bn_beta (Optional) Batch normalization layer beta tensor info. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights + * @note if nullptr, bn_beta is set to 0.0 + * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor info. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights + * @note if nullptr, bn_gamma is set to 1.0 + * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. + * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. + * + * @return a status + */ + static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, + const ITensorInfo *fused_weights, const ITensorInfo *fused_bias, + const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr, + float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input_weights; + const ITensor *_input_bias; + const ITensor *_bn_mean; + const ITensor *_bn_var; + const ITensor *_bn_gamma; + const ITensor *_bn_beta; + ITensor *_fused_weights; + ITensor *_fused_bias; + float _epsilon; + bool _run_in_place_weights; + bool _run_in_place_bias; + + using FuseBatchNormFunction = void(const ITensor *input_weights, const ITensor *input_bias, ITensor *fused_weights, ITensor *fused_bias, + const ITensor *bn_mean, const ITensor *bn_var, const ITensor *bn_beta, const ITensor *bn_gamma, float epsilon, const Window &window); + + FuseBatchNormFunction *_func; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h b/src/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h new file mode 100644 index 0000000000..775a2c06ab --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMASSEMBLYBASE_H +#define ARM_COMPUTE_NEGEMMASSEMBLYBASE_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Base class for GEMM NEON kernels implemented in Assembly. */ +class NEGEMMAssemblyBaseKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMAssemblyBaseKernel"; + } + /** Constructor */ + NEGEMMAssemblyBaseKernel() + : _input0(nullptr), _input1(nullptr), _output(nullptr), _workspace(nullptr), _alpha(1.f), _beta(0.f), _is_transposed_0(false), _is_transposed_1(false) + { + } + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMAssemblyBaseKernel(const NEGEMMAssemblyBaseKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMAssemblyBaseKernel &operator=(const NEGEMMAssemblyBaseKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMAssemblyBaseKernel(NEGEMMAssemblyBaseKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMAssemblyBaseKernel &operator=(NEGEMMAssemblyBaseKernel &&) = default; + + virtual ~NEGEMMAssemblyBaseKernel() = default; + + /** Initialise the kernel's input and output. + * + * The computed function is C = a * AxB + b * C. + * + * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F32 + * @param[in] input1 Input tensor containing the Matrix B. Data types supported: same as @p input0 + * @param[in,out] output Output tensor to store the result of matrix multiplication. If @p beta is not zero the values are multiplied by @p beta before the result is accumulated. Otherwise the values are overwritten by the result. Data types supported: same as @p input0. + * @param[out] workspace Space for intermediate results. + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of the accumulation. + * @param[in] is_transposed_0 (Optional)True if @p input0 is transposed else false. (Defaults to false) + * @param[in] is_transposed_1 (Optional)True if @p input1 is transposed else false. (Defaults to false) + */ + void configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha = 1.f, float beta = 0.f, bool is_transposed_0 = false, bool is_transposed_1 = false) + { + internal_configure(input0, input1, output, workspace, alpha, beta, is_transposed_0, is_transposed_1); + } + +protected: + virtual void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool _is_transposed_0, bool _is_transposed_1) = 0; + + const ITensor *_input0; + const ITensor *_input1; + ITensor *_output; + ITensor *_workspace; + float _alpha; + float _beta; + bool _is_transposed_0; + bool _is_transposed_1; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEGEMMASSEMBLYBASE_H*/ diff --git a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp index 2997c1d003..5d178ea85b 100644 --- a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp +++ b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp @@ -21,16 +21,16 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h new file mode 100644 index 0000000000..85939ebae9 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H +#define ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to interleave the elements of a matrix + * + * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values) + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\ + * \end{array} \right) + * @f] + * + * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ] + */ +class NEGEMMInterleave4x4Kernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEGEMMInterleave4x4Kernel"; + } + /** Constructor */ + NEGEMMInterleave4x4Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMInterleave4x4Kernel(const NEGEMMInterleave4x4Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMInterleave4x4Kernel &operator=(const NEGEMMInterleave4x4Kernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMInterleave4x4Kernel(NEGEMMInterleave4x4Kernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMInterleave4x4Kernel &operator=(NEGEMMInterleave4x4Kernel &&) = default; + /** Default destructor */ + ~NEGEMMInterleave4x4Kernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input. + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMInterleave4x4Kernel + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run gemm interleave 4x4 + * + * @tparam ScalarType Scalar datatype + * + * @param[in] input Input tensor. Data types supported: uint32_t, uint16_t and uint8_t + * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void gemm_interleave4x4(const ITensor *input, ITensor *output, const Window &window); + + /** Common signature for all the specialised gemm interleave 4x4 functions + * + * @param[in] input Input tensor. Data types supported: uint32_t, uint16_t and uint8_t + * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + using GEMMInterleaveFunctionFuncPtr = void (NEGEMMInterleave4x4Kernel::*)(const ITensor *input, ITensor *output, const Window &window); + + GEMMInterleaveFunctionFuncPtr _func; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H*/ diff --git a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp index acc519012b..4dbfc3b022 100644 --- a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h new file mode 100644 index 0000000000..14d03fe3eb --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H +#define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to multiply matrices + * + * @note @ref NEGEMMLowpMatrixMultiplyKernel low precision matrix product kernel + * This kernel performs the following computation: + * + * -# Convert a values from int8 to int32 + * -# Convert b values from int8 to int32 + * -# Compute the int32 matrix product of the resulting a * b and store the result as int32 + * + */ +class NEGEMMLowpMatrixMultiplyKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpMatrixMultiplyKernel"; + } + /** Constructor */ + NEGEMMLowpMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpMatrixMultiplyKernel(const NEGEMMLowpMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpMatrixMultiplyKernel &operator=(const NEGEMMLowpMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixMultiplyKernel(NEGEMMLowpMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixMultiplyKernel &operator=(NEGEMMLowpMatrixMultiplyKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpMatrixMultiplyKernel() = default; + /** Initialise the kernel's input and output. + * + * The input matrices @p input0 and @p input1 must be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel. These two + * kernels change the layout of the original matrices to be more cache-friendly. + * + * @param[in] input0 Input tensor containing the interleaved Matrix A. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED + * @param[in] input1 Input tensor containing the transposed1xW Matrix B. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 + */ + void configure(const ITensor *input0, const ITensor *input1, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyKernel + * + * @param[in] input0 Input tensor info containing the interleaved Matrix A. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED + * @param[in] input1 Input tensor info containing the transposed Matrix B. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[in] output Output tensor info to store the result of matrix multiplication. Data type supported: S32 + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input0; + const ITensor *_input1; + ITensor *_output; + bool _slide_matrix_b; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H*/ diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp index 1c76926546..174a06955f 100644 --- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h new file mode 100644 index 0000000000..0f37e584b9 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H +#define ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel used to add the offset contribution after @ref NEGEMMLowpMatrixMultiplyKernel. The computation is performed in-place + * + * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), + * and adds to it the offset contribution of matrix A and matrix B in-place. + * + * The final result is: + * + * mm_result[i][k] = mm_result[i][k] + + * (vector_sum_col[k] * a_offset) + + * (vector_sum_row[i] * b_offset) + + * (a_offset * b_offset * k) + * + */ +class NEGEMMLowpOffsetContributionKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpOffsetContributionKernel"; + } + /** Constructor */ + NEGEMMLowpOffsetContributionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpOffsetContributionKernel(const NEGEMMLowpOffsetContributionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpOffsetContributionKernel &operator=(const NEGEMMLowpOffsetContributionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpOffsetContributionKernel(NEGEMMLowpOffsetContributionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpOffsetContributionKernel &operator=(NEGEMMLowpOffsetContributionKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpOffsetContributionKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in, out] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] k Number of matrix A columns or Matrix B rows + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + */ + void configure(ITensor *mm_result, const ITensor *vector_sum_col, const ITensor *vector_sum_row, int32_t k, int32_t a_offset, int32_t b_offset); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpOffsetContributionKernel + * + * @param[in] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * + * @return a status + */ + static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, int32_t a_offset, int32_t b_offset); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_vector_sum_col; + const ITensor *_vector_sum_row; + ITensor *_mm_result; + int32_t _a_offset; + int32_t _b_offset; + int32_t _k_offset; + bool _slide_vector_sum_col; +}; +} // namespace arm_compute + +#endif /* ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp index 6a7d225167..3c8f5ae022 100644 --- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h new file mode 100644 index 0000000000..4c68fb0943 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H +#define ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel used to add the offset contribution and perform the output stage after @ref NEGEMMLowpMatrixMultiplyKernel. + * + * The computation is performed in-place + * + * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), + * and adds to it the offset contribution of matrix A and matrix B in-place. + * + * The output stage can perform either QuantizeDownInt32ToUint8Scale or QuantizeDownInt32ToUint8ScaleByFixedPoint for Uint8. + * The output stage can perform either QuantizeDownInt32ToInt8Scale or QuantizeDownInt32ToInt8ScaleByFixedPoint for Int8. + * + * For QuantizeDownInt32ToUint8Scale/QuantizeDownInt32ToInt8Scale the final result is: + * + * ((mm_result'[i][k] + result_offset) * result_mult_int) >> result_shift + * + * For QuantizeDownInt32ToUint8ScaleByFixedPoint/QuantizeDownInt32ToInt8ScaleByFixedPoint the final result is: + * + * (FixedPointMul(mm_result'[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift + * + * where FixedPointMul(x, y) is the nearest integer to the following + * mathematical expression, evaluated without overflow or intermediate rounding: + * + * (x * y) / 2^31 + * + * and mm_result'[i][k] = mm_result[i][k] + + * (vector_sum_col[k] * a_offset) + + * (vector_sum_row[i] * b_offset) + + * (a_offset * b_offset * k) + */ + +class NEGEMMLowpOffsetContributionOutputStageKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpOffsetContributionOutputStageKernel"; + } + /** Constructor */ + NEGEMMLowpOffsetContributionOutputStageKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpOffsetContributionOutputStageKernel(const NEGEMMLowpOffsetContributionOutputStageKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpOffsetContributionOutputStageKernel &operator=(const NEGEMMLowpOffsetContributionOutputStageKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpOffsetContributionOutputStageKernel(NEGEMMLowpOffsetContributionOutputStageKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpOffsetContributionOutputStageKernel &operator=(NEGEMMLowpOffsetContributionOutputStageKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpOffsetContributionOutputStageKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32 + * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p mm_result. + * @param[out] output Output tensor containing the final quantized result. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] k Number of matrix A columns or Matrix B rows + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_stage GEMMLowp output stage info, providing the type of quantization and the necessary parameters. + */ + void configure(const ITensor *mm_result, const ITensor *vector_sum_col, const ITensor *vector_sum_row, const ITensor *bias, ITensor *output, int32_t k, int32_t a_offset, int32_t b_offset, + GEMMLowpOutputStageInfo output_stage); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpOffsetContributionOutputStageKernel + * + * @param[in] mm_result Input tensor info containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32 + * @param[in] vector_sum_col Tensor info for the input row-vector of sums of all the entries in each column of matrix B. + * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result + * @param[in] vector_sum_row Tensor info for the input row-vector of sums of all the entries in each row of matrix A. + * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result + * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p mm_result. + * @param[in] output Output tensor info containing the final quantized result. Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_stage GEMMLowp output stage info, providing the type of quantization and the necessary parameters. + * + * @return a status + */ + static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, const ITensorInfo *output, int32_t a_offset, + int32_t b_offset, + GEMMLowpOutputStageInfo output_stage); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Function to use for the particular tensors passed to configure() */ + const ITensor *_vector_sum_col; + const ITensor *_vector_sum_row; + const ITensor *_bias; + const ITensor *_mm_result; + ITensor *_output; + int32_t _a_offset; + int32_t _b_offset; + int32_t _k_offset; + bool _slide_vector_sum_col; + GEMMLowpOutputStageInfo _output_stage; +}; +} // namespace arm_compute + +#endif /* ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp index 659c4105c1..2e78107a1a 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h new file mode 100644 index 0000000000..42ef570f77 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H +#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED + * + * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. + * The following computations will be performed by the kernel: + * + * -# Add offset terms to final result + * -# Multiply each entry of result by result_mult_int + * -# Add bias to final result if bias tensor is not a nullptr + * -# Shift the int32 accumulator by result_shift + * -# Clamp the value between the specified min and max bounds + * -# Clamp the resulting int32 values: + * -# -to the [0..255] range and cast to QASYMM8. + * -# -to the [-128..127] range and cast to QASYMM8_SIGNED. + * + */ +class NEGEMMLowpQuantizeDownInt32ScaleKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpQuantizeDownInt32ScaleKernel"; + } + /** Constructor */ + NEGEMMLowpQuantizeDownInt32ScaleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpQuantizeDownInt32ScaleKernel(const NEGEMMLowpQuantizeDownInt32ScaleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpQuantizeDownInt32ScaleKernel &operator=(const NEGEMMLowpQuantizeDownInt32ScaleKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpQuantizeDownInt32ScaleKernel(NEGEMMLowpQuantizeDownInt32ScaleKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpQuantizeDownInt32ScaleKernel &operator=(NEGEMMLowpQuantizeDownInt32ScaleKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpQuantizeDownInt32ScaleKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[out] output_stage GEMMLowp output stage metadata. + */ + void configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo *output_stage); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ScaleKernel + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED + * @param[out] output_stage GEMMLowp output stage metadata. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run the NEGEMMLowpQuantizeDownInt32ScaleKernel + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run(const Window &window); + + /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ScaleKernel functions + * + * @param[in] window Region on which to execute the kernel. + */ + using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ScaleKernel::*)(const Window &window); + + QuantizeDownFunctionPtr _func; + const ITensor *_input; + const ITensor *_bias; + ITensor *_output; + const GEMMLowpOutputStageInfo *_output_stage; + bool _is_bounded_relu; +}; +} // namespace arm_compute + +#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp index afa8cec76f..1fafc62302 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h new file mode 100644 index 0000000000..d04e713cb8 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H +#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16 + * + * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QSYMM16 value. + * The following computations will be performed by the kernel: + * + * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier + * -# Add bias to final result if bias tensor is not a nullptr + * -# Round to nearest division by a power-of-two using result_shift + * -# Clamp the value between the specified min and max bounds + * -# Clamp the resulting int32 values to the [-32768, 32767] range and cast to QSYMM16. + * + */ +class NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel"; + } + /** Constructor */ + NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16 + * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add + * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0. + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16. + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0. + */ + void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel + * + * @param[in] input Input tensor info. Data type supported: S32 + * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor info with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0. + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16, + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run the NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run(const Window &window); + + /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel functions + * + * @param[in] window Region on which to execute the kernel. + */ + using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::*)(const Window &window); + + QuantizeDownFunctionPtr _func; + const ITensor *_input; + const ITensor *_bias; + ITensor *_output; + int _result_fixedpoint_multiplier; + int _result_shift; + int _min; + int _max; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp index 83416e03e9..bf9ce9554d 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h new file mode 100644 index 0000000000..55c07fbb5a --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H +#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8_SIGNED + * + * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8_SIGNED value. + * The following computations will be performed by the kernel: + * + * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier + * -# Add bias to final result if bias tensor is not a nullptr + * -# Round to nearest division by a power-of-two using result_shift + * -# Add offset to each result + * -# Clamp the value between the specified min and max bounds + * -# Clamp the resulting int32 values to the [-128..127] range and cast to QASYMM8_SIGNED. + * + */ +class NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel"; + } + /** Constructor */ + NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED + * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add + * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication + * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED, + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + */ + void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED, + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run the NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run(const Window &window); + + /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel functions + * + * @param[in] window Region on which to execute the kernel. + */ + using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::*)(const Window &window); + + QuantizeDownFunctionPtr _func; + const ITensor *_input; + const ITensor *_bias; + ITensor *_output; + int _result_fixedpoint_multiplier; + int _result_shift; + int _result_offset_after_shift; + int _min; + int _max; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp index 1e8aa0cc0a..cbb56da8c0 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h new file mode 100644 index 0000000000..1a8de1c441 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H +#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8 + * + * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8 value. + * The following computations will be performed by the kernel: + * + * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier + * -# Add bias to final result if bias tensor is not a nullptr + * -# Round to nearest division by a power-of-two using result_shift + * -# Add offset to each result + * -# Clamp the value between the specified min and max bounds + * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8. + * + */ +class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel"; + } + /** Constructor */ + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8 + * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add + * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication + * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + */ + void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel + * + * @param[in] input Input tensor. Data type supported: S32 + * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 + * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run the NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run(const Window &window); + + /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel functions + * + * @param[in] window Region on which to execute the kernel. + */ + using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::*)(const Window &window); + + QuantizeDownFunctionPtr _func; + const ITensor *_input; + const ITensor *_bias; + ITensor *_output; + int _result_fixedpoint_multiplier; + int _result_shift; + int _result_offset_after_shift; + int _min; + int _max; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp index 566872f02c..db038e559e 100644 --- a/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/KernelDescriptors.h" diff --git a/src/core/NEON/kernels/NEGEMMLowpReductionKernel.h b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.h new file mode 100644 index 0000000000..655658cb6c --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.h @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H +#define ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; +struct GEMMLowpReductionKernelInfo; + +/** Common interface for all NEON reduction kernels */ +class INEGEMMLowpReductionKernel : public INEKernel +{ +public: + /** Constructor */ + INEGEMMLowpReductionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + INEGEMMLowpReductionKernel(const INEGEMMLowpReductionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + INEGEMMLowpReductionKernel &operator=(const INEGEMMLowpReductionKernel &) = delete; + /** Allow instances of this class to be moved */ + INEGEMMLowpReductionKernel(INEGEMMLowpReductionKernel &&) = default; + /** Allow instances of this class to be moved */ + INEGEMMLowpReductionKernel &operator=(INEGEMMLowpReductionKernel &&) = default; + /** Default destructor */ + virtual ~INEGEMMLowpReductionKernel() = default; + + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k Number of matrix columns/rows depending on the type of reduction. + * - is_reshaped True if the matrix has been reshaped. + * - scalar Scalar value to multiply each reduced column/row by. + * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. + */ + virtual void configure(const ITensor *input, ITensor *output, const GEMMLowpReductionKernelInfo &info) = 0; + +protected: + const ITensor *_input; + ITensor *_output; + int32_t _k; + int32_t _scalar; + bool _mul_by_scalar; +}; + +/** NEON kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A. + * + * @note This stage is needed to handle the offset of matrix product + * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md + */ +class NEGEMMLowpMatrixAReductionKernel : public INEGEMMLowpReductionKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpMatrixAReductionKernel"; + } + /** Default constructor */ + NEGEMMLowpMatrixAReductionKernel() = default; + /** Prevent instances of this class from being copied */ + NEGEMMLowpMatrixAReductionKernel(const NEGEMMLowpMatrixAReductionKernel &) = delete; + /** Prevent instances of this class from being copied */ + NEGEMMLowpMatrixAReductionKernel &operator=(const NEGEMMLowpMatrixAReductionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixAReductionKernel(NEGEMMLowpMatrixAReductionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixAReductionKernel &operator=(NEGEMMLowpMatrixAReductionKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpMatrixAReductionKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k (num_mtx_a_cols) Number of matrix A columns + * - is_reshaped (is_interleaved4x4) True if the matrix A has been interleaved4x4 + * - scalar Scalar value to multiply each reduced row by. + * - mul_byscalar True if each reduced column must be multiplied by a scalar value. + */ + void configure(const ITensor *mtx_a, ITensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override; + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixAReductionKernel + * + * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k (num_mtx_a_cols) Number of matrix A columns + * - is_reshaped (is_interleaved4x4) True if the matrix A has been interleaved4x4 + * - scalar Scalar value to multiply each reduced row by. + * - mul_byscalar True if each reduced column must be multiplied by a scalar value. + * + * @return a status + */ + static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Execution of the reduction kernel specialized on the input type + * + * @param[in] window Execution window + */ + template + void run_internal(const Window &window); +}; + +/** NEON kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B. + * + * @note This stage is needed to handle the offset of matrix product + * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md + */ +class NEGEMMLowpMatrixBReductionKernel : public INEGEMMLowpReductionKernel +{ +public: + const char *name() const override + { + return "NEGEMMLowpMatrixBReductionKernel"; + } + /** Default constructor */ + NEGEMMLowpMatrixBReductionKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMLowpMatrixBReductionKernel(const NEGEMMLowpMatrixBReductionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMLowpMatrixBReductionKernel &operator=(const NEGEMMLowpMatrixBReductionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixBReductionKernel(NEGEMMLowpMatrixBReductionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixBReductionKernel &operator=(NEGEMMLowpMatrixBReductionKernel &&) = default; + /** Default destructor */ + ~NEGEMMLowpMatrixBReductionKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k (num_mtx_b_rows) Number of matrix B rows. + * - is_reshaped (is_transposed1xW) True if the input tensor is transposed 1xW. + * - scalar Scalar value to multiply each reduced row by. + * - mul_byscalar True if each reduced row must be multiplied by a scalar value. + */ + void configure(const ITensor *mtx_b, ITensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override; + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixBReductionKernel + * + * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL + * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 + * @param[in] info Kernel metadata: + * - k (num_mtx_b_rows) Number of matrix B rows. + * - is_reshaped (is_transposed1xW) True if the input tensor is transposed 1xW. + * - scalar Scalar value to multiply each reduced row by. + * - mul_byscalar True if each reduced row must be multiplied by a scalar value. + * + * @return a status + */ + static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Execution of the reduction kernel specialized on the input type + * + * @param[in] window Execution window + * @param[in] info Thread-related information + */ + template + void run_internal(const Window &window, const ThreadInfo &info); +}; +} // namespace arm_compute + +#endif /* ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp index 9aee26ca55..6a2802a991 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h new file mode 100644 index 0000000000..48377838d2 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H +#define ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta: + * + * @note [ MTX_OUT = MTX_0 + beta * MTX_1 ] with MTX_0 and MTX_1 of the same size + * + * @note This stage is used to finalize the GEMM result and it is computed if and only if beta != 0.0. In case this kernel is used for finalizing GEMM result, we have: + * - MTX_0 = A * B * alpha, where MTX_0 is the output of @ref NEGEMMMatrixMultiplyKernel + * - MTX_1 = C + */ +class NEGEMMMatrixAdditionKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEGEMMMatrixAdditionKernel"; + } + /** Constructor */ + NEGEMMMatrixAdditionKernel(); + /** Prevent instances of this class from being copied */ + NEGEMMMatrixAdditionKernel(const NEGEMMMatrixAdditionKernel &) = delete; + /** Prevent instances of this class from being copied */ + NEGEMMMatrixAdditionKernel &operator=(const NEGEMMMatrixAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMMatrixAdditionKernel(NEGEMMMatrixAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMMatrixAdditionKernel &operator=(NEGEMMMatrixAdditionKernel &&) = default; + /** Default destructor */ + ~NEGEMMMatrixAdditionKernel() = default; + /** Initialise the kernel's input and output. + * + * @note The input and output tensor must have the same dimensions + * + * @param[in] input Input tensor (Matrix C). Data types supported: F16/F32 + * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input. + * @param[in] beta Weight of matrix C + */ + void configure(const ITensor *input, ITensor *output, float beta); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixAdditionKernel. + * + * @note The input and output tensor must have the same dimensions + * + * @param[in] input Input tensor info (Matrix C). Data types supported: F16/F32 + * @param[in] output Output tensor info. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input. + * @param[in] beta Weight of matrix C + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the matrix addition functions + * + * @param[in] input An input tensor. Data types supported: F16/F32 + * @param[out] output The output tensor. Data type supported: same as @p input + * @param[in] window Region on which to execute the kernel. + * @param[in] beta Weight of matrix C + */ + using MatrixAdditionFunction = void(const ITensor *input, ITensor *output, const Window &window, float beta); + /** Matrix addition function to use for the particular tensor types passed to configure() */ + MatrixAdditionFunction *_func; + float _beta; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp index a9236890e3..fc95c08f62 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h new file mode 100644 index 0000000000..1ea948de63 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H +#define ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to multiply two input matrices "A" and "B". All elements of the output matrix/vector will be multiplied by alpha after the matrix multiplication + * + * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref NEGEMMInterleave4x4Kernel" and @ref NEGEMMTranspose1xWKernel + * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped + * + */ +class NEGEMMMatrixMultiplyKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEGEMMMatrixMultiplyKernel"; + } + /** Constructor */ + NEGEMMMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMMatrixMultiplyKernel(const NEGEMMMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMMatrixMultiplyKernel &operator=(const NEGEMMMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMMatrixMultiplyKernel(NEGEMMMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMMatrixMultiplyKernel &operator=(NEGEMMMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel + * These two kernels change the layout of the original matrices to be more cache-friendly. + * + * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. + * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] alpha Weight of the matrix product + * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel + * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped + */ + void configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixMultiplyKernel + * + * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. + * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 + * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] alpha Weight of the matrix product + * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel + * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input0; + const ITensor *_input1; + ITensor *_output; + float _alpha; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H*/ diff --git a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp index b9b4fe9e9c..6d9f921b02 100644 --- a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp @@ -21,14 +21,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "src/core/AccessWindowStatic.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h new file mode 100644 index 0000000000..7120943a90 --- /dev/null +++ b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H +#define ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** NEON kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor) + * + * Following an example of how the transposition1xW works when the input data is F32 + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + * + * Following an example of how the transposition1xW works when the input data type is F16 + * + * @f[ + * \left( \begin{array}{cccccccc} + * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 \\ + * a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 \\ + * a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 \\ + * a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc} + * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 & a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 & a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 & a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37\\ + * \end{array} \right) + * @f] + * + * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor) + * + */ +class NEGEMMTranspose1xWKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEGEMMTranspose1xWKernel"; + } + /** Constructor */ + NEGEMMTranspose1xWKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMTranspose1xWKernel(const NEGEMMTranspose1xWKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMTranspose1xWKernel &operator=(const NEGEMMTranspose1xWKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMTranspose1xWKernel(NEGEMMTranspose1xWKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMTranspose1xWKernel &operator=(NEGEMMTranspose1xWKernel &&) = default; + /** Default destructor */ + ~NEGEMMTranspose1xWKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: same as @p input. + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xWKernel + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] output Output tensor info. Data type supported: same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGatherKernel.cpp b/src/core/NEON/kernels/NEGatherKernel.cpp index 193fe98c7b..55ecb8840f 100644 --- a/src/core/NEON/kernels/NEGatherKernel.cpp +++ b/src/core/NEON/kernels/NEGatherKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGatherKernel.h" +#include "src/core/NEON/kernels/NEGatherKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEGatherKernel.h b/src/core/NEON/kernels/NEGatherKernel.h new file mode 100644 index 0000000000..d81e34c39c --- /dev/null +++ b/src/core/NEON/kernels/NEGatherKernel.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_NEGATHERKERNEL_H +#define ARM_COMPUTE_NEGATHERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Kernel to perform other operation on NEON */ +class NEGatherKernel : public INEKernel +{ +public: + /** Default constructor. */ + NEGatherKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEGatherKernel(const NEGatherKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEGatherKernel &operator=(const NEGatherKernel &) = delete; + /** Allow instances of this class to be moved. */ + NEGatherKernel(NEGatherKernel &&) = default; + /** Allow instances of this class to be moved. */ + NEGatherKernel &operator=(NEGatherKernel &&) = default; + /** Default detructor */ + ~NEGatherKernel() = default; + + /** Name of the kernel + * + * @return Kernel name + */ + const char *name() const override + { + return "NEGatherKernel"; + } + /** Initialise the kernel's inputs and outputs + * + * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All + * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 + */ + void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0); + /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel + * + * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All + * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Implementation of the gather operation for 0 axis. + * + * For gather on the 0 axis an element by element copy is performed. + * + * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) + * @param[in] info Info about executing thread and CPU. + */ + template + void gather_0_axis(const Window &window, const ThreadInfo &info); + + /** Implementation of the gather operation. + * + * For 1<=axis a row-wise copy is taking place. + * + * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) + * @param[in] info Info about executing thread and CPU. + */ + template + void gather_n_axis(const Window &window, const ThreadInfo &info); + + using kernel_ptr = void (NEGatherKernel::*)(const Window &window, const ThreadInfo &info); + + const ITensor *_input; + const ITensor *_indices; + int _axis; + ITensor *_output; + kernel_ptr _func; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEGATHERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp b/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp index 5ff5db7266..63b26ab7c0 100644 --- a/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp @@ -21,13 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h" +#include "src/core/NEON/kernels/NEGaussian3x3Kernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEGaussian3x3Kernel.h b/src/core/NEON/kernels/NEGaussian3x3Kernel.h new file mode 100644 index 0000000000..8973b48e7a --- /dev/null +++ b/src/core/NEON/kernels/NEGaussian3x3Kernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H +#define ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a Gaussian 3x3 filter */ +class NEGaussian3x3Kernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEGaussian3x3Kernel"; + } + /** Constructor */ + NEGaussian3x3Kernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussian3x3Kernel(const NEGaussian3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussian3x3Kernel &operator=(const NEGaussian3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NEGaussian3x3Kernel(NEGaussian3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NEGaussian3x3Kernel &operator=(NEGaussian3x3Kernel &&) = default; + /** Default destructor */ + ~NEGaussian3x3Kernel() = default; + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H */ diff --git a/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp b/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp index 5bb3e76ded..ab2feb0dc2 100644 --- a/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp +++ b/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp @@ -21,15 +21,15 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h" +#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -112,6 +112,10 @@ void NEGaussian5x5HorKernel::run(const Window &window, const ThreadInfo &info) input, output); } +NEGaussian5x5VertKernel::NEGaussian5x5VertKernel() +{ +} + BorderSize NEGaussian5x5VertKernel::border_size() const { return BorderSize{ 2, 0 }; diff --git a/src/core/NEON/kernels/NEGaussian5x5Kernel.h b/src/core/NEON/kernels/NEGaussian5x5Kernel.h new file mode 100644 index 0000000000..f4bca55637 --- /dev/null +++ b/src/core/NEON/kernels/NEGaussian5x5Kernel.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H +#define ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a Gaussian 5x5 filter (horizontal pass) */ +class NEGaussian5x5HorKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEGaussian5x5HorKernel"; + } + /** Default constructor */ + NEGaussian5x5HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussian5x5HorKernel(NEGaussian5x5HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussian5x5HorKernel &operator=(NEGaussian5x5HorKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGaussian5x5HorKernel(NEGaussian5x5HorKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGaussian5x5HorKernel &operator=(NEGaussian5x5HorKernel &&) = default; + /** Default destructor */ + ~NEGaussian5x5HorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + BorderSize _border_size; +}; + +/** NEON kernel to perform a Gaussian 5x5 filter (vertical pass) */ +class NEGaussian5x5VertKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEGaussian5x5VertKernel"; + } + /** Default constructor */ + NEGaussian5x5VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussian5x5VertKernel(NEGaussian5x5VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussian5x5VertKernel &operator=(NEGaussian5x5VertKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGaussian5x5VertKernel(NEGaussian5x5VertKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGaussian5x5VertKernel &operator=(NEGaussian5x5VertKernel &&) = default; + /** Default destructor */ + ~NEGaussian5x5VertKernel() = default; + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: S16. + * @param[out] output Destination tensor, Data type supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H */ diff --git a/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp b/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp index 62cf414df2..49c8e9ec3e 100644 --- a/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp +++ b/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp @@ -21,17 +21,17 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h" +#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEGaussianPyramidKernel.h b/src/core/NEON/kernels/NEGaussianPyramidKernel.h new file mode 100644 index 0000000000..e852db2699 --- /dev/null +++ b/src/core/NEON/kernels/NEGaussianPyramidKernel.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H +#define ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a GaussianPyramid (horizontal pass) */ +class NEGaussianPyramidHorKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEGaussianPyramidHorKernel"; + } + /** Default constructor */ + NEGaussianPyramidHorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &&) = default; + /** Default destructor */ + ~NEGaussianPyramidHorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Output should have half the input width. Data type supported: S16. + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + int _l2_load_offset; +}; + +/** NEON kernel to perform a GaussianPyramid (vertical pass) */ +class NEGaussianPyramidVertKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEGaussianPyramidVertKernel"; + } + /** Default constructor */ + NEGaussianPyramidVertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &&) = default; + /** Default destructor */ + ~NEGaussianPyramidVertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: S16. + * @param[out] output Destination tensor. Output should have half the input height. Data type supported: U8. + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + int _t2_load_offset; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp index 483f204b04..516a9b68c2 100644 --- a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp +++ b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" +#include "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h new file mode 100644 index 0000000000..f6d39e50a7 --- /dev/null +++ b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H +#define ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" +namespace arm_compute +{ +class ITensor; + +/** Interface for Compute All Anchors kernel */ +class NEComputeAllAnchorsKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEComputeAllAnchorsKernel"; + } + + /** Default constructor */ + NEComputeAllAnchorsKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEComputeAllAnchorsKernel(const NEComputeAllAnchorsKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEComputeAllAnchorsKernel &operator=(const NEComputeAllAnchorsKernel &) = delete; + /** Allow instances of this class to be moved */ + NEComputeAllAnchorsKernel(NEComputeAllAnchorsKernel &&) = default; + /** Allow instances of this class to be moved */ + NEComputeAllAnchorsKernel &operator=(NEComputeAllAnchorsKernel &&) = default; + /** Default destructor */ + ~NEComputeAllAnchorsKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 + * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input + * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo + * + */ + void configure(const ITensor *anchors, ITensor *all_anchors, const ComputeAnchorsInfo &info); + + /** Static function to check if given info will lead to a valid configuration of @ref NEComputeAllAnchorsKernel + * + * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 + * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input + * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo + * + * @return a Status + */ + static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + template + void internal_run(const Window &window); + + const ITensor *_anchors; + ITensor *_all_anchors; + ComputeAnchorsInfo _anchors_info; +}; +} // arm_compute +#endif // ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H diff --git a/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp b/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp index 00f4087cbc..089cd34e0c 100644 --- a/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp +++ b/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" +#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/HOGInfo.h" diff --git a/src/core/NEON/kernels/NEHOGDescriptorKernel.h b/src/core/NEON/kernels/NEHOGDescriptorKernel.h new file mode 100644 index 0000000000..7845bc2cdf --- /dev/null +++ b/src/core/NEON/kernels/NEHOGDescriptorKernel.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H +#define ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H + +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/Size2D.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform HOG Orientation Binning */ +class NEHOGOrientationBinningKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEHOGOrientationBinningKernel"; + } + /** Default constructor */ + NEHOGOrientationBinningKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGOrientationBinningKernel(const NEHOGOrientationBinningKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGOrientationBinningKernel &operator=(const NEHOGOrientationBinningKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHOGOrientationBinningKernel(NEHOGOrientationBinningKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHOGOrientationBinningKernel &operator=(NEHOGOrientationBinningKernel &&) = default; + /** Default destructor */ + ~NEHOGOrientationBinningKernel() = default; + + /** Initialise the kernel's inputs, output and HOG's metadata + * + * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. + * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 + * @param[out] output Output tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[in] hog_info HOG's metadata + */ + void configure(const ITensor *input_magnitude, const ITensor *input_phase, ITensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised block normalization functions + * + * @param[in] mag_row_ptr Pointer to the first row of the cell in the magnitude tensor + * @param[in] phase_row_ptr Pointer to the first row of the cell in the phase tensor + * @param[out] output_ptr Pointer to the output cell of hog space tensor + * @param[in] mag_stride Stride of the magnitude tensor + * @param[in] phase_stride Stride of the phase tensor + * @param[in] cell_width Width of the cell + * @param[in] cell_height Height of the cell + * @param[in] num_bins Number of bins for each cell + * @param[in] phase_scale Scale factor to apply to the phase in order to calculate the histogram index + */ + using OrientBinFunc = void(const int16_t *__restrict mag_row_ptr, const uint8_t *__restrict phase_row_ptr, float *__restrict output_ptr, size_t mag_stride, size_t phase_stride, size_t cell_width, + size_t cell_height, size_t num_bins, float phase_scale); + /** Orientation binning function to use for the particular cell width passed to configure() */ + OrientBinFunc *_func; + const ITensor *_input_magnitude; + const ITensor *_input_phase; + ITensor *_output; + size_t _cell_width; + size_t _cell_height; + size_t _num_bins; + float _phase_scale; +}; + +/** NEON kernel to perform HOG block normalization */ +class NEHOGBlockNormalizationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEHOGBlockNormalizationKernel"; + } + /** Default constructor */ + NEHOGBlockNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGBlockNormalizationKernel(const NEHOGBlockNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGBlockNormalizationKernel &operator=(const NEHOGBlockNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHOGBlockNormalizationKernel(NEHOGBlockNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHOGBlockNormalizationKernel &operator=(NEHOGBlockNormalizationKernel &&) = default; + /** Default destructor */ + ~NEHOGBlockNormalizationKernel() = default; + + /** Initialise the kernel's input, output and HOG's metadata + * + * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog_info HOG's metadata + */ + void configure(const ITensor *input, ITensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised block normalization functions + * + * @param[in] input_row_ptr Pointer to the first row of the block in the input hog space tensor + * @param[out] output_ptr Pointer to the output block of the hog normalized space + * @param[in] input_stride Stride of the input hog space tensor + * @param[in] num_cells_per_block_height Number of cells per block along the Y direction + * @param[in] num_bins_block_x Number of bins per block along the X direction + * @param[in] num_bins_block Number of total bins per block + * @param[in] l2_hyst_threshold Threshold to use for l2 hysteresis normalization + */ + using BlockNormFunc = void(const float *input_row_ptr, float *output_ptr, size_t input_stride, size_t num_cells_per_block_height, size_t num_bins_block_x, size_t num_bins_block, + float l2_hyst_threshold); + /** Block normalization function to use for the particular normalization type passed to configure() */ + BlockNormFunc *_func; + const ITensor *_input; + ITensor *_output; + Size2D _num_cells_per_block; + Size2D _num_cells_per_block_stride; + size_t _num_bins; + float _l2_hyst_threshold; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H */ diff --git a/src/core/NEON/kernels/NEHOGDetectorKernel.cpp b/src/core/NEON/kernels/NEHOGDetectorKernel.cpp index d5dfa4195d..cba1d5538a 100644 --- a/src/core/NEON/kernels/NEHOGDetectorKernel.cpp +++ b/src/core/NEON/kernels/NEHOGDetectorKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" +#include "src/core/NEON/kernels/NEHOGDetectorKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/HOGInfo.h" diff --git a/src/core/NEON/kernels/NEHOGDetectorKernel.h b/src/core/NEON/kernels/NEHOGDetectorKernel.h new file mode 100644 index 0000000000..45c28099c8 --- /dev/null +++ b/src/core/NEON/kernels/NEHOGDetectorKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEHOGDETECTORKERNEL_H +#define ARM_COMPUTE_NEHOGDETECTORKERNEL_H + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/IHOG.h" +#include "src/core/NEON/INEKernel.h" +#include "support/Mutex.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform HOG detector kernel using linear SVM */ +class NEHOGDetectorKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEHOGDetectorKernel"; + } + /** Default constructor */ + NEHOGDetectorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGDetectorKernel(const NEHOGDetectorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGDetectorKernel &operator=(const NEHOGDetectorKernel &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEHOGDetectorKernel(NEHOGDetectorKernel &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEHOGDetectorKernel &operator=(NEHOGDetectorKernel &&) = delete; + /** Default destructor */ + ~NEHOGDetectorKernel() = default; + + /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect + * + * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref NEHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog HOG data object used by @ref NEHOGOrientationBinningKernel and @ref NEHOGBlockNormalizationKernel + * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the hog->info()->block_stride() + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, uint16_t idx_class = 0); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + IDetectionWindowArray *_detection_windows; + const float *_hog_descriptor; + float _bias; + float _threshold; + uint16_t _idx_class; + size_t _num_bins_per_descriptor_x; + size_t _num_blocks_per_descriptor_y; + size_t _block_stride_width; + size_t _block_stride_height; + size_t _detection_window_width; + size_t _detection_window_height; + size_t _max_num_detection_windows; + arm_compute::Mutex _mutex; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEHOGDETECTORKERNEL_H */ diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp index be68b9c44b..4159e434b2 100644 --- a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp +++ b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "src/core/NEON/kernels/NEHarrisCornersKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.h b/src/core/NEON/kernels/NEHarrisCornersKernel.h new file mode 100644 index 0000000000..4b794107a2 --- /dev/null +++ b/src/core/NEON/kernels/NEHarrisCornersKernel.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEHARRISCORNERSKERNEL_H +#define ARM_COMPUTE_NEHARRISCORNERSKERNEL_H + +#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" +#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" +#include "arm_compute/core/IArray.h" +#include "src/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Common interface for all Harris Score kernels */ +class INEHarrisScoreKernel : public INEKernel +{ +public: + /** Default constructor */ + INEHarrisScoreKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEHarrisScoreKernel(const INEHarrisScoreKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEHarrisScoreKernel &operator=(const INEHarrisScoreKernel &) = delete; + /** Allow instances of this class to be moved */ + INEHarrisScoreKernel(INEHarrisScoreKernel &&) = default; + /** Allow instances of this class to be moved */ + INEHarrisScoreKernel &operator=(INEHarrisScoreKernel &&) = default; + /** Default destructor */ + ~INEHarrisScoreKernel() = default; + +public: + /** Setup the kernel parameters + * + * @param[in] input1 Source image (gradient X). Data types supported: S16/S32 + * @param[in] input2 Source image (gradient Y). Data types supported: same as @ input1 + * @param[out] output Destination image (harris score). Data types supported: F32 + * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) + * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + virtual void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) = 0; + +protected: + const IImage *_input1; /**< Source image - Gx component */ + const IImage *_input2; /**< Source image - Gy component */ + IImage *_output; /**< Source image - Harris score */ + float _sensitivity; /**< Sensitivity value */ + float _strength_thresh; /**< Threshold value */ + float _norm_factor; /**< Normalization factor */ + BorderSize _border_size; /**< Border size */ +}; + +/** Template NEON kernel to perform Harris Score. + * The implementation supports 3, 5, and 7 for the block_size + */ +template +class NEHarrisScoreKernel : public INEHarrisScoreKernel +{ +public: + const char *name() const override + { + return "NEHarrisScoreKernel"; + } + /** Default constructor */ + NEHarrisScoreKernel(); + // Inherited methods overridden: + void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override; + BorderSize border_size() const override; + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised harris score functions */ + using HarrisScoreFunction = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, + float norm_factor, float sensitivity, float strength_thresh); + /** Harris Score function to use for the particular image types passed to configure() */ + HarrisScoreFunction *_func; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEHARRISCORNERSKERNEL_H */ diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp index a50712598a..227013a014 100644 --- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h new file mode 100644 index 0000000000..9d100ebff1 --- /dev/null +++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H +#define ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the height concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class NEHeightConcatenateLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEHeightConcatenateLayerKernel"; + } + /** Default constructor */ + NEHeightConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHeightConcatenateLayerKernel(const NEHeightConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHeightConcatenateLayerKernel &operator=(const NEHeightConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHeightConcatenateLayerKernel(NEHeightConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHeightConcatenateLayerKernel &operator=(NEHeightConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~NEHeightConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] height_offset The starting offset on the Y axis for the output tensor. + * @param[in,out] output Output tensor info. Data types supported: Same as @p input. + * + */ + void configure(const ITensorInfo *input, unsigned int height_offset, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEHeightConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] height_offset The starting offset on the Y axis for the output tensor. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + unsigned int _height_offset; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEHistogramKernel.cpp b/src/core/NEON/kernels/NEHistogramKernel.cpp index 12d1bb8e7e..eddc3b29ab 100644 --- a/src/core/NEON/kernels/NEHistogramKernel.cpp +++ b/src/core/NEON/kernels/NEHistogramKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" +#include "src/core/NEON/kernels/NEHistogramKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEHistogramKernel.h b/src/core/NEON/kernels/NEHistogramKernel.h new file mode 100644 index 0000000000..e14519ce25 --- /dev/null +++ b/src/core/NEON/kernels/NEHistogramKernel.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEHISTOGRAMKERNEL_H +#define ARM_COMPUTE_NEHISTOGRAMKERNEL_H + +#include "src/core/NEON/INEKernel.h" +#include "support/Mutex.h" + +#include +#include + +namespace arm_compute +{ +class IDistribution1D; +class ITensor; +using IImage = ITensor; + +/** Interface for the histogram kernel */ +class NEHistogramKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEHistogramKernel"; + } + /** Default constructor */ + NEHistogramKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHistogramKernel(const NEHistogramKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHistogramKernel &operator=(const NEHistogramKernel &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEHistogramKernel(NEHistogramKernel &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEHistogramKernel &operator=(NEHistogramKernel &&) = delete; + /** Default destructor */ + ~NEHistogramKernel() = default; + + /** Set the input image and the distribution output. + * + * @param[in] input Source image. Data type supported: U8. + * @param[out] output Destination distribution. + * @param[in,out] local_hist Array that the threads use to save their local histograms. + * It's size should be equal to (number_of_threads * num_bins), + * and the Window::thread_id() is used to determine the part of the array + * used by each thread. + * @param[out] window_lut LUT with pre-calculated possible window values. + * The size of the LUT should be equal to max_range_size and it will be filled + * during the configure stage, while it re-used in every run, therefore can be + * safely shared among threads. + */ + void configure(const IImage *input, IDistribution1D *output, uint32_t *local_hist, uint32_t *window_lut); + /** Set the input image and the distribution output. + * + * @note Used for histogram of fixed size equal to 256 + * + * @param[in] input Source image. Data type supported: U8. + * @param[out] output Destination distribution which must be of 256 bins.. + */ + void configure(const IImage *input, IDistribution1D *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Function to merge multiple partial histograms. + * + * @param[out] global_hist Pointer to the final histogram. + * @param[in] local_hist Pointer to the partial histograms. + * @param[in] bins Number of bins. + */ + void merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins); + /** Function to merge multiple minimum values of partial histograms. + * + * @param[out] global_min Pointer to the global min value. + * @param[in] local_min Local min value. + */ + void merge_min(uint8_t *global_min, const uint8_t &local_min); + /** Function to perform histogram on the given window + * + * @param[in] win Region on which to execute the kernel + * @param[in] info Info about the executing thread + */ + void histogram_U8(Window win, const ThreadInfo &info); + /** Function to perform histogram on the given window where histogram is + * of fixed size 256 without ranges and offsets. + * + * @param[in] win Region on which to execute the kernel + * @param[in] info Info about the executing thread + */ + void histogram_fixed_U8(Window win, const ThreadInfo &info); + /** Pre-calculate the pixel windowing for every possible pixel + * + * Calculate (V - offset) * numBins / range where V is every possible pixel value. + * + * @note We currently support U8 image thus possible pixel values are between 0 and 255 + */ + void calculate_window_lut() const; + /** Common signature for all the specialised Histogram functions + * + * @param[in] window Region on which to execute the kernel. + */ + using HistogramFunctionPtr = void (NEHistogramKernel::*)(Window window, const ThreadInfo &info); + + HistogramFunctionPtr _func; ///< Histogram function to use for the particular image types passed to configure() + const IImage *_input; + IDistribution1D *_output; + uint32_t *_local_hist; + uint32_t *_window_lut; + arm_compute::Mutex _hist_mtx; + static constexpr unsigned int _max_range_size{ 256 }; ///< 256 possible pixel values as we handle only U8 images +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEHISTOGRAMKERNEL_H */ diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 915ea75431..93bfcc501a 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" +#include "src/core/NEON/kernels/NEIm2ColKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEIm2ColKernel.h b/src/core/NEON/kernels/NEIm2ColKernel.h new file mode 100644 index 0000000000..6c1c631d82 --- /dev/null +++ b/src/core/NEON/kernels/NEIm2ColKernel.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEIM2COLKERNEL_H +#define ARM_COMPUTE_NEIM2COLKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; +class Size2D; + +/** Interface for the im2col reshape kernel. + * + * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. + * It is used to transform a convolution to a plain matrix multiplication. + * + * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have: + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccc} + * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\ + * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\ + * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\ + * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + */ +class NEIm2ColKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEIm2ColKernel"; + } + /** Default constructor */ + NEIm2ColKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIm2ColKernel(const NEIm2ColKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIm2ColKernel &operator=(const NEIm2ColKernel &) = delete; + /** Allow instances of this class to be moved */ + NEIm2ColKernel(NEIm2ColKernel &&) = default; + /** Allow instances of this class to be moved */ + NEIm2ColKernel &operator=(NEIm2ColKernel &&) = default; + /** Default destructor */ + ~NEIm2ColKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 + * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false + * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported + */ + void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, + bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1); + /** Static function to check if given info will lead to a valid configuration of @ref NEIm2ColKernel + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 + * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false + * @param[in] output The output tensor. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, + bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run im2col + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run_im2col(const Window &window); + + /** Common signature for all the specialised im2col functions + * + * @param[in] window Region on which to execute the kernel. + */ + using Im2ColFunctionPtr = void (NEIm2ColKernel::*)(const Window &window); + + Im2ColFunctionPtr _func; + const ITensor *_input; + ITensor *_output; + std::pair _convolved_dims; + PadStrideInfo _conv_info; + unsigned int _kernel_width; + unsigned int _kernel_height; + bool _has_bias; + Size2D _dilation; + DataLayout _data_layout; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEIM2COLKERNEL_H */ diff --git a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp index 7aa23de6eb..08bf6f0e76 100644 --- a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h" +#include "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h new file mode 100644 index 0000000000..96c0119719 --- /dev/null +++ b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; +struct InstanceNormalizationLayerKernelInfo; + +/** Interface for performing an instance normalization */ +class NEInstanceNormalizationLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEInstanceNormalizationLayerKernel"; + } + /** Default constructor */ + NEInstanceNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEInstanceNormalizationLayerKernel(const NEInstanceNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEInstanceNormalizationLayerKernel &operator=(const NEInstanceNormalizationLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEInstanceNormalizationLayerKernel(NEInstanceNormalizationLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEInstanceNormalizationLayerKernel &operator=(NEInstanceNormalizationLayerKernel &&) = default; + /** Default destructor */ + ~NEInstanceNormalizationLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW + * In case of @p output tensor = nullptr this tensor will store the result of the normalization. + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. + * @param[in] info Kernel meta-data descriptor + */ + void configure(ITensor *input, ITensor *output, const InstanceNormalizationLayerKernelInfo &info); + + /** Static function to check if given info will lead to a valid configuration of @ref NEInstanceNormalizationLayer. + * + * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NCHW + * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. + * @param[in] info Kernel meta-data descriptor + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialized instance normalization functions + * + * @param[in, out] input An input tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization. + * @param[out] output The output tensor. + * @param[in] gamma The scale scalar value applied to the normalized tensor. Defaults to 1.0 + * @param[in] beta The offset scalar value applied to the normalized tensor. Defaults to 0.0 + * @param[in] epsilon Lower bound value for the normalization. Defaults to 1e-12 + */ + using NormalizationFunction = void(ITensor *input, ITensor *output, float gamma, float beta, float epsilon, const Window &window); + + NormalizationFunction *_func; + ITensor *_input; + ITensor *_output; + float _gamma; + float _beta; + float _epsilon; + bool _use_mixed_precision{ true }; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEIntegralImageKernel.cpp b/src/core/NEON/kernels/NEIntegralImageKernel.cpp index 5fc6ca65e3..6ee97eea30 100644 --- a/src/core/NEON/kernels/NEIntegralImageKernel.cpp +++ b/src/core/NEON/kernels/NEIntegralImageKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h" +#include "src/core/NEON/kernels/NEIntegralImageKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEIntegralImageKernel.h b/src/core/NEON/kernels/NEIntegralImageKernel.h new file mode 100644 index 0000000000..8d92504317 --- /dev/null +++ b/src/core/NEON/kernels/NEIntegralImageKernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H +#define ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Kernel to perform an image integral on an image */ +class NEIntegralImageKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEIntegralImageKernel"; + } + /** Default constructor */ + NEIntegralImageKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIntegralImageKernel(const NEIntegralImageKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIntegralImageKernel &operator=(const NEIntegralImageKernel &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEIntegralImageKernel(NEIntegralImageKernel &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEIntegralImageKernel &operator=(NEIntegralImageKernel &&) = delete; + /** Default destructor */ + ~NEIntegralImageKernel() = default; + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U32 + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + bool is_parallelisable() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp index a216981f0f..dae5b57fec 100644 --- a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp +++ b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h" +#include "src/core/NEON/kernels/NEL2NormalizeLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.h b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.h new file mode 100644 index 0000000000..af3ad3403e --- /dev/null +++ b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H +#define ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */ +class NEL2NormalizeLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEL2NormalizeLayerKernel"; + } + /** Default constructor */ + NEL2NormalizeLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEL2NormalizeLayerKernel(const NEL2NormalizeLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEL2NormalizeLayerKernel &operator=(const NEL2NormalizeLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEL2NormalizeLayerKernel(NEL2NormalizeLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEL2NormalizeLayerKernel &operator=(NEL2NormalizeLayerKernel &&) = default; + /** Default destructor */ + ~NEL2NormalizeLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32. + * @param[in] sum Sum values tensor. Data types supported: same as @p input. + * Sum will have the same number of dimensions as input. + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 + * @param[in] epsilon Lower bound value for the normalization. + */ + void configure(const ITensor *input, const ITensor *sum, ITensor *output, int axis, float epsilon); + + /** Static function to check if given info will lead to a valid configuration of @ref NEL2NormalizeLayerKernel. + * + * @param[in] input Source tensor info. Data types supported: F16/F32. + * @param[in] sum Sum values tensor info. Data types supported: same as @p input. + * Sum will have the same number of dimensions as input. + * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 + * @param[in] epsilon Lower bound value for the normalization. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + const ITensor *_sum; + ITensor *_output; + unsigned int _actual_axis; + float _epsilon; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NELKTrackerKernel.cpp b/src/core/NEON/kernels/NELKTrackerKernel.cpp index 6567a8d206..442f001102 100644 --- a/src/core/NEON/kernels/NELKTrackerKernel.cpp +++ b/src/core/NEON/kernels/NELKTrackerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" +#include "src/core/NEON/kernels/NELKTrackerKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NELKTrackerKernel.h b/src/core/NEON/kernels/NELKTrackerKernel.h new file mode 100644 index 0000000000..c24166c042 --- /dev/null +++ b/src/core/NEON/kernels/NELKTrackerKernel.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_LKTRACKERKERNEL_H +#define ARM_COMPUTE_LKTRACKERKERNEL_H + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface for NEON Array of Internal Key Points. */ +using INELKInternalKeypointArray = IArray; + +/** Interface for the Lucas-Kanade tracker kernel */ +class NELKTrackerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NELKTrackerKernel"; + } + /** Default constructor */ + NELKTrackerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELKTrackerKernel(const NELKTrackerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELKTrackerKernel &operator=(const NELKTrackerKernel &) = delete; + /** Allow instances of this class to be moved */ + NELKTrackerKernel(NELKTrackerKernel &&) = default; + /** Allow instances of this class to be moved */ + NELKTrackerKernel &operator=(NELKTrackerKernel &&) = default; + /** Default destructor */ + ~NELKTrackerKernel() = default; + + /** Initialise the kernel input and output + * + * @param[in] input_old Pointer to the input old tensor. Data type supported: U8 + * @param[in] input_new Pointer to the input new tensor. Data type supported. U8 + * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data type supported: S16 + * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data type supported: S16 + * @param[in] old_points Pointer to the IKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points + * @param[out] new_points Pointer to the IKeyPointArray storing new key points + * @param[in, out] old_points_internal Pointer to the array of NELKInternalKeypoint for old points + * @param[out] new_points_internal Pointer to the array of NELKInternalKeypoint for new points + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminate the algorithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + * @param[in] num_levels The number of pyramid levels + * @param[in] pyramid_scale Scale factor used for generating the pyramid + */ + void configure(const ITensor *input_old, const ITensor *input_new, const ITensor *old_scharr_gx, const ITensor *old_scharr_gy, + const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, IKeyPointArray *new_points, + INELKInternalKeypointArray *old_points_internal, INELKInternalKeypointArray *new_points_internal, + Termination termination, bool use_initial_estimate, float epsilon, unsigned int num_iterations, size_t window_dimension, + size_t level, size_t num_levels, float pyramid_scale); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /** Initialise the array of keypoints in the provide range + * + * @param[in] start Index of first element in the keypoints array to be initialised + * @param[in] end Index after last elelemnt in the keypoints array to be initialised + */ + void init_keypoints(int start, int end); + /** Compute the structure tensor A^T * A based on the scharr gradients I_x and I_y + * + * @param[in] keypoint Keypoint for which gradients are computed + * @param[out] bilinear_ix Intermediate interpolated data for X gradient + * @param[out] bilinear_iy Intermediate interpolated data for Y gradient + * + * @return Values A11, A12, A22 + */ + std::tuple compute_spatial_gradient_matrix(const NELKInternalKeypoint &keypoint, int32_t *bilinear_ix, int32_t *bilinear_iy); + /** Compute the vector A^T * b, i.e. -sum(I_d * I_t) for d in {x,y} + * + * @param[in] old_keypoint Old keypoint for which gradient is computed + * @param[in] new_keypoint New keypoint for which gradient is computed + * @param[in] bilinear_ix Intermediate interpolated data for X gradient + * @param[in] bilinear_iy Intermediate interpolated data for Y gradient + * + * @return Values b1, b2 + */ + std::pair compute_image_mismatch_vector(const NELKInternalKeypoint &old_keypoint, const NELKInternalKeypoint &new_keypoint, const int32_t *bilinear_ix, const int32_t *bilinear_iy); + + const ITensor *_input_old; + const ITensor *_input_new; + const ITensor *_old_scharr_gx; + const ITensor *_old_scharr_gy; + IKeyPointArray *_new_points; + const IKeyPointArray *_new_points_estimates; + const IKeyPointArray *_old_points; + INELKInternalKeypointArray *_old_points_internal; + INELKInternalKeypointArray *_new_points_internal; + Termination _termination; + bool _use_initial_estimate; + float _pyramid_scale; + float _epsilon; + unsigned int _num_iterations; + int _window_dimension; + unsigned int _level; + unsigned int _num_levels; + ValidRegion _valid_region; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NELKTRACKERKERNEL_H */ diff --git a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp index b8e6a6d763..f11694dee4 100644 --- a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h new file mode 100644 index 0000000000..72093b4bb7 --- /dev/null +++ b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H +#define ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to multiply each row of first tensor with low 2 dimensions of second tensor. */ +class NELocallyConnectedMatrixMultiplyKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NELocallyConnectedMatrixMultiplyKernel"; + } + /** Default constructor */ + NELocallyConnectedMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELocallyConnectedMatrixMultiplyKernel(const NELocallyConnectedMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELocallyConnectedMatrixMultiplyKernel &operator=(const NELocallyConnectedMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + NELocallyConnectedMatrixMultiplyKernel(NELocallyConnectedMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + NELocallyConnectedMatrixMultiplyKernel &operator=(NELocallyConnectedMatrixMultiplyKernel &&) = default; + /** Default destructor */ + ~NELocallyConnectedMatrixMultiplyKernel() = default; + /** Initialise the kernel's input and output + * + * @param[in] input0 First input tensor. Data types supported: F16, F32 + * @param[in] input1 Second input tensor containing the Matrix B. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + */ + void configure(const ITensor *input0, const ITensor *input1, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NELocallyConnectedMatrixMultiplyKernel + * + * @param[in] input0 First input tensor info. Data types supported: F16, F32 + * @param[in] input1 Second input tensor info. Data type supported: same as @p input0 + * @param[in] output Output tensor info. Data type supported: same as @p input0 + * + * @return a status + */ + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input0; + const ITensor *_input1; + ITensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H */ diff --git a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp index 8d82e1abd6..205f67823d 100644 --- a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp +++ b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" +#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEMagnitudePhaseKernel.h b/src/core/NEON/kernels/NEMagnitudePhaseKernel.h new file mode 100644 index 0000000000..3803d05ce9 --- /dev/null +++ b/src/core/NEON/kernels/NEMagnitudePhaseKernel.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H +#define ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Template interface for the kernel to compute magnitude and phase */ +template +class NEMagnitudePhaseKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEMagnitudePhaseKernel"; + } + /** Default constructor */ + NEMagnitudePhaseKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitudePhaseKernel(const NEMagnitudePhaseKernel &) = delete; + /** Default move constructor */ + NEMagnitudePhaseKernel(NEMagnitudePhaseKernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitudePhaseKernel &operator=(const NEMagnitudePhaseKernel &) = delete; + /** Default move assignment operator */ + NEMagnitudePhaseKernel &operator=(NEMagnitudePhaseKernel &&) = default; + /** Destructor */ + ~NEMagnitudePhaseKernel() = default; + + /** Initialise the kernel's input, output. + * + * @note At least one of out1 or out2 must be set + * + * @param[in] gx Gradient X tensor. Data type supported: S16. + * @param[in] gy Gradient Y tensor. Data type supported: S16. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data type supported: S16. + * @param[out] phase (Optional) The output tensor - Phase. Data type supported: U8. + */ + void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Function to perform magnitude on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void magnitude(const Window &window); + /** Function to perform phase on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void phase(const Window &window); + /** Function to perform magnitude and phase on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void magnitude_phase(const Window &window); + +private: + /** Common signature for all the specialised MagnitudePhase functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MagnitudePhaseFunctionPtr = void (NEMagnitudePhaseKernel::*)(const Window &window); + /** MagnitudePhase function to use for the particular formats passed to configure() */ + MagnitudePhaseFunctionPtr _func; + const ITensor *_gx; /**< Input gradient X */ + const ITensor *_gy; /**< Input gradient Y */ + ITensor *_magnitude; /**< Output - Magnitude */ + ITensor *_phase; /**< Output - Phase */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp index 87caf00477..761fa15238 100644 --- a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h" +#include "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" diff --git a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h new file mode 100644 index 0000000000..8cdfe2b953 --- /dev/null +++ b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H +#define ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the pooling layer kernel */ +class NEMaxUnpoolingLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEMaxUnpoolingLayerKernel"; + } + /** Default constructor */ + NEMaxUnpoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMaxUnpoolingLayerKernel(const NEMaxUnpoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMaxUnpoolingLayerKernel &operator=(const NEMaxUnpoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEMaxUnpoolingLayerKernel(NEMaxUnpoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEMaxUnpoolingLayerKernel &operator=(NEMaxUnpoolingLayerKernel &&) = default; + /** Default destructor */ + ~NEMaxUnpoolingLayerKernel() = default; + /** Set the input and output tensors. + * + * @note Output shape must be equal to the shape of the original input to pool. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] indices Tensor containing the offset to store the input elements in the output tensor. + * @ref NEPoolingLayerKernel with indices should precede this function in order to + * properly reconstruct the output tensor. + * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(const ITensor *input, const ITensor *indices, ITensor *output, const PoolingLayerInfo &pool_info); + /** Static function to check if given info will lead to a valid configuration of @ref NEMaxUnpoolingLayerKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Destination tensor info. Data types supported: Same as @p input. + * @param[in] indices Tensor info of the indices of the maximal values. Data type supported: U32. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool. + * + * @param[in] window_input Input region on which to execute the kernel. + */ + template + void unpooling2(const Window &window_input); + + using UnpoolingFunction = void (NEMaxUnpoolingLayerKernel::*)(const Window &window); + +private: + UnpoolingFunction _func; + const ITensor *_input; + ITensor *_output; + const ITensor *_indices; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp index c4e036a8b9..a6bb9f2ef7 100644 --- a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h" +#include "src/core/NEON/kernels/NEMeanStdDevKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEMeanStdDevKernel.h b/src/core/NEON/kernels/NEMeanStdDevKernel.h new file mode 100644 index 0000000000..e694f3824d --- /dev/null +++ b/src/core/NEON/kernels/NEMeanStdDevKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEMEANSTDDEVKERNEL_H +#define ARM_COMPUTE_NEMEANSTDDEVKERNEL_H + +#include "src/core/NEON/INEKernel.h" +#include "support/Mutex.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */ +class NEMeanStdDevKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEMeanStdDevKernel"; + } + /** Default constructor */ + NEMeanStdDevKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevKernel(const NEMeanStdDevKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevKernel &operator=(const NEMeanStdDevKernel &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMeanStdDevKernel(NEMeanStdDevKernel &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMeanStdDevKernel &operator=(NEMeanStdDevKernel &&) = delete; + /** Default destructor */ + ~NEMeanStdDevKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input image. Data type supported: U8. + * @param[out] mean Input average pixel value. + * @param[out] global_sum Keeps global sum of pixel values. + * @param[out] stddev (Optional) Output standard deviation of pixel values. + * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values. + */ + void configure(const IImage *input, float *mean, uint64_t *global_sum, float *stddev = nullptr, uint64_t *global_sum_squared = nullptr); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + + BorderSize border_size() const override; + +private: + const IImage *_input; + float *_mean; + float *_stddev; + uint64_t *_global_sum; + uint64_t *_global_sum_squared; + arm_compute::Mutex _mtx; + BorderSize _border_size; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEMEANSTDDEVKERNEL_H */ diff --git a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp index 8ee9ff6f40..6a41e3a161 100644 --- a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h" +#include "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h new file mode 100644 index 0000000000..59d073ada5 --- /dev/null +++ b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H +#define ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#include +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to normalize the input 2D tensor across the first dimension with respect to mean and standard deviation of the same dimension. */ +class NEMeanStdDevNormalizationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEMeanStdDevNormalizationKernel"; + } + /** Default constructor */ + NEMeanStdDevNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevNormalizationKernel(const NEMeanStdDevNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevNormalizationKernel &operator=(const NEMeanStdDevNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEMeanStdDevNormalizationKernel(NEMeanStdDevNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEMeanStdDevNormalizationKernel &operator=(NEMeanStdDevNormalizationKernel &&) = default; + /** Default destructor */ + ~NEMeanStdDevNormalizationKernel() = default; + /** Initialise the kernel's input and outputs. + * + * @note If the output tensor is a nullptr, the normalization will be performed in-place. + * + * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr, + * this tensor will store the result of the normalization. Data types supported: F16/F32. + * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input + * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. + */ + void configure(ITensor *input, ITensor *output = nullptr, float epsilon = 1e-8f); + /** Static function to check if given info will lead to a valid configuration of @ref NEMeanStdDevNormalizationKernel + * + * @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr, + * this tensor will store the result of the normalization. Data types supported: F16/F32. + * @param[in] output (Optional) Destination tensor info. It can be nullptr in case of in-place computation. Data type supported: same as @p input + * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output = nullptr, float epsilon = 1e-8f); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Normalizes the input with respect to mean and standard deviation. + * + * @param[in] window Region on which to execute the kernel. + */ + template + void mean_stddev_normalization(const Window &window); + + ITensor *_input; + ITensor *_output; + float _epsilon; + + using MeanStdDevNormFunction = void (NEMeanStdDevNormalizationKernel::*)(const Window &window); + + MeanStdDevNormFunction _func; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEMedian3x3Kernel.cpp b/src/core/NEON/kernels/NEMedian3x3Kernel.cpp index 86fcc30e91..0160edc650 100644 --- a/src/core/NEON/kernels/NEMedian3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEMedian3x3Kernel.cpp @@ -21,14 +21,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h" +#include "src/core/NEON/kernels/NEMedian3x3Kernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEMedian3x3Kernel.h b/src/core/NEON/kernels/NEMedian3x3Kernel.h new file mode 100644 index 0000000000..b9e28b3053 --- /dev/null +++ b/src/core/NEON/kernels/NEMedian3x3Kernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEMEDIAN3x3KERNEL_H +#define ARM_COMPUTE_NEMEDIAN3x3KERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Kernel to perform a median filter on a tensor */ +class NEMedian3x3Kernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NEMedian3x3Kernel"; + } + /** Default constructor */ + NEMedian3x3Kernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMedian3x3Kernel(const NEMedian3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMedian3x3Kernel &operator=(const NEMedian3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NEMedian3x3Kernel(NEMedian3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NEMedian3x3Kernel &operator=(NEMedian3x3Kernel &&) = default; + /** Default destructor */ + ~NEMedian3x3Kernel() = default; + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEMEDIAN3x3KERNEL_H */ diff --git a/src/core/NEON/kernels/NEMemsetKernel.cpp b/src/core/NEON/kernels/NEMemsetKernel.cpp index fd427cc8c5..a8dfda3775 100644 --- a/src/core/NEON/kernels/NEMemsetKernel.cpp +++ b/src/core/NEON/kernels/NEMemsetKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h" +#include "src/core/NEON/kernels/NEMemsetKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEMemsetKernel.h b/src/core/NEON/kernels/NEMemsetKernel.h new file mode 100644 index 0000000000..a720e60251 --- /dev/null +++ b/src/core/NEON/kernels/NEMemsetKernel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEMEMSETKERNEL_H +#define ARM_COMPUTE_NEMEMSETKERNEL_H + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for filling the planes of a tensor */ +class NEMemsetKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEMemsetKernel"; + } + /** Default constructor */ + NEMemsetKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMemsetKernel(const NEMemsetKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMemsetKernel &operator=(const NEMemsetKernel &) = delete; + /** Allow instances of this class to be moved */ + NEMemsetKernel(NEMemsetKernel &&) = default; + /** Allow instances of this class to be moved */ + NEMemsetKernel &operator=(NEMemsetKernel &&) = default; + /** Default destructor */ + ~NEMemsetKernel() = default; + /** Initialise the kernel's tensor and filling value + * + * @param[in,out] tensor Input tensor to fill. Supported data types: All + * @param[in] constant_value The value used to fill the planes of the tensor + */ + void configure(ITensor *tensor, const PixelValue &constant_value); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + ITensor *_tensor; + PixelValue _constant_value; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEMEMSETKERNEL_H */ diff --git a/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp b/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp index f675c391ed..92f6b4a42e 100644 --- a/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h" +#include "src/core/NEON/kernels/NEMinMaxLayerKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEMinMaxLayerKernel.h b/src/core/NEON/kernels/NEMinMaxLayerKernel.h new file mode 100644 index 0000000000..b4852ad9f2 --- /dev/null +++ b/src/core/NEON/kernels/NEMinMaxLayerKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_NEMINMAXLAYERKERNEL_H +#define ARM_COMPUTE_NEMINMAXLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" +#include "support/Mutex.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform min max search on a 3D tensor. */ +class NEMinMaxLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEMinMaxLayerKernel"; + } + /** Default constructor */ + NEMinMaxLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxLayerKernel(const NEMinMaxLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxLayerKernel &operator=(const NEMinMaxLayerKernel &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMinMaxLayerKernel(NEMinMaxLayerKernel &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMinMaxLayerKernel &operator=(NEMinMaxLayerKernel &&) = delete; + /** Default destructor */ + ~NEMinMaxLayerKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @note output[0] = minimum + * @note output[1] = maximum + * + * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data type supported: F32. + * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum value for each 3D input tensor. + * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32 + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel + * + * @param[in] input Input tensor info. Data types supported: F32. + * @param[in] output Output tensor info with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor. + * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + /** Resets global minimum and maximum. */ + void reset(); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + void update_min_max(float *out_ptr, float min, float max); + const ITensor *_input; + ITensor *_output; + arm_compute::Mutex _mtx; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEMINMAXLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp b/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp index e1691dc8ff..402e6f1811 100644 --- a/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp +++ b/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h" +#include "src/core/NEON/kernels/NEMinMaxLocationKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEMinMaxLocationKernel.h b/src/core/NEON/kernels/NEMinMaxLocationKernel.h new file mode 100644 index 0000000000..a24666096f --- /dev/null +++ b/src/core/NEON/kernels/NEMinMaxLocationKernel.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H +#define ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H + +#include "arm_compute/core/IArray.h" +#include "src/core/NEON/INEKernel.h" +#include "support/Mutex.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Interface for the kernel to perform min max search on an image. */ +class NEMinMaxKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEMinMaxKernel"; + } + /** Default constructor */ + NEMinMaxKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxKernel(const NEMinMaxKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxKernel &operator=(const NEMinMaxKernel &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMinMaxKernel(NEMinMaxKernel &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMinMaxKernel &operator=(NEMinMaxKernel &&) = delete; + /** Default destructor */ + ~NEMinMaxKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input Image. Data types supported: U8/S16/F32. + * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. + * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. + */ + void configure(const IImage *input, void *min, void *max); + /** Resets global minimum and maximum. */ + void reset(); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Performs the min/max algorithm on U8 images on a given window. + * + * @param win The window to run the algorithm on. + */ + void minmax_U8(Window win); + /** Performs the min/max algorithm on S16 images on a given window. + * + * @param win The window to run the algorithm on. + */ + void minmax_S16(Window win); + /** Performs the min/max algorithm on F32 images on a given window. + * + * @param win The window to run the algorithm on. + */ + void minmax_F32(Window win); + /** Common signature for all the specialised MinMax functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MinMaxFunction = void (NEMinMaxKernel::*)(Window window); + /** MinMax function to use for the particular image types passed to configure() */ + MinMaxFunction _func; + /** Helper to update min/max values **/ + template + void update_min_max(T min, T max); + + const IImage *_input; /**< Input image. */ + void *_min; /**< Minimum value. */ + void *_max; /**< Maximum value. */ + arm_compute::Mutex _mtx; /**< Mutex used for result reduction. */ +}; + +/** Interface for the kernel to find min max locations of an image. */ +class NEMinMaxLocationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEMinMaxLocationKernel"; + } + /** Default constructor */ + NEMinMaxLocationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxLocationKernel(const NEMinMaxLocationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxLocationKernel &operator=(const NEMinMaxLocationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEMinMaxLocationKernel(NEMinMaxLocationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEMinMaxLocationKernel &operator=(NEMinMaxLocationKernel &&) = default; + /** Default destructor */ + ~NEMinMaxLocationKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input Image. Data types supported: U8/S16/F32. + * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. + * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. + * @param[out] min_loc Array of minimum value locations. + * @param[out] max_loc Array of maximum value locations. + * @param[out] min_count Number of minimum value encounters. + * @param[out] max_count Number of maximum value encounters. + */ + void configure(const IImage *input, void *min, void *max, + ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr, + uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + bool is_parallelisable() const override; + +private: + /** Performs the min/max location algorithm on T type images on a given window. + * + * @param win The window to run the algorithm on. + */ + template + void minmax_loc(const Window &win); + /** Common signature for all the specialised MinMaxLoc functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MinMaxLocFunction = void (NEMinMaxLocationKernel::*)(const Window &window); + /** MinMaxLoc function to use for the particular image types passed to configure() */ + MinMaxLocFunction _func; + /** Helper to create a function pointer table for the parameterized MinMaxLocation functions. */ + template + struct create_func_table; + + const IImage *_input; /**< Input image. */ + void *_min; /**< Minimum value. */ + void *_max; /**< Maximum value. */ + uint32_t *_min_count; /**< Count of minimum value encounters. */ + uint32_t *_max_count; /**< Count of maximum value encounters. */ + ICoordinates2DArray *_min_loc; /**< Locations of minimum values. */ + ICoordinates2DArray *_max_loc; /**< Locations of maximum values. */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NENonLinearFilterKernel.cpp b/src/core/NEON/kernels/NENonLinearFilterKernel.cpp index 31919ead03..58c0acd404 100644 --- a/src/core/NEON/kernels/NENonLinearFilterKernel.cpp +++ b/src/core/NEON/kernels/NENonLinearFilterKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h" +#include "src/core/NEON/kernels/NENonLinearFilterKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NENonLinearFilterKernel.h b/src/core/NEON/kernels/NENonLinearFilterKernel.h new file mode 100644 index 0000000000..3cef12e8ec --- /dev/null +++ b/src/core/NEON/kernels/NENonLinearFilterKernel.h @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NENONLINEARFILTERKERNEL_H +#define ARM_COMPUTE_NENONLINEARFILTERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to apply a non-linear filter */ +class NENonLinearFilterKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NENonLinearFilterKernel"; + } + /** Default constructor */ + NENonLinearFilterKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonLinearFilterKernel(NENonLinearFilterKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &) = delete; + /** Allow instances of this class to be moved */ + NENonLinearFilterKernel(NENonLinearFilterKernel &&) = default; + /** Allow instances of this class to be moved */ + NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &&) = default; + /** Default destructor */ + ~NENonLinearFilterKernel() = default; + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /** Fill mask with the corresponding given pattern. + * + * @param[in,out] mask Mask to be filled according to pattern + * @param[in] cols Columns (width) of mask + * @param[in] rows Rows (height) of mask + * @param[in] pattern Pattern to fill the mask according to + */ + void fill_mask(uint8_t *mask, int cols, int rows, MatrixPattern pattern); + /** Apply a median filter when given mask pattern is defined as box. + * + * @param[in] win Window to apply the filter on. + */ + template + void median_filter_box(const Window &win); + /** Apply a min filter when given mask pattern is defined as box. + * + * @param[in] win Window to apply the filter on. + */ + template + void min_filter_box(const Window &win); + /** Apply a max filter when given mask pattern is defined as box. + * + * @param[in] win Window to apply the filter on. + */ + template + void max_filter_box(const Window &win); + /** Apply a median filter when given mask pattern is defined as cross. + * + * @param[in] win Window to apply the filter on. + */ + template + void median_filter_cross(const Window &win); + /** Apply a min filter when given mask pattern is defined as cross. + * + * @param[in] win Window to apply the filter on. + */ + template + void min_filter_cross(const Window &win); + /** Apply a max filter when given mask pattern is defined as cross. + * + * @param[in] win Window to apply the filter on. + */ + template + void max_filter_cross(const Window &win); + /** Apply a median filter when given mask pattern is defined as disk. + * + * @param[in] win Window to apply the filter on. + */ + template + void median_filter_disk(const Window &win); + /** Apply a min filter when given mask pattern is defined as disk. + * + * @param[in] win Window to apply the filter on. + */ + template + void min_filter_disk(const Window &win); + /** Apply a max filter when given mask pattern is defined as disk. + * + * @param[in] win Window to apply the filter on. + */ + template + void max_filter_disk(const Window &win); + /** Apply a non-linear filter when given mask has user-defined pattern. + * + * @param[in] win Window to apply the filter on. + */ + template + void non_linear_filter_generic(const Window &win); + +private: + unsigned int _border_width; + const ITensor *_input; + ITensor *_output; + const uint8_t *_mask; + MatrixPattern _pattern; + NonLinearFilterFunction _function; + unsigned int _func_idx; + BorderSize _border_size; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NENONLINEARFILTERKERNEL_H */ diff --git a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp index 9566ced768..9f5dfcdcdb 100644 --- a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp +++ b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" +#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h new file mode 100644 index 0000000000..d32dfecfeb --- /dev/null +++ b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H +#define ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H + +#include "src/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface to perform Non-Maxima suppression over a 3x3 window using NEON + * + * @note Used by @ref NEFastCorners and @ref NEHarrisCorners + */ +class NENonMaximaSuppression3x3Kernel : public INEKernel +{ +public: + const char *name() const override + { + return "NENonMaximaSuppression3x3Kernel"; + } + /** Default constructor */ + NENonMaximaSuppression3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonMaximaSuppression3x3Kernel(const NENonMaximaSuppression3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonMaximaSuppression3x3Kernel &operator=(const NENonMaximaSuppression3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NENonMaximaSuppression3x3Kernel(NENonMaximaSuppression3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NENonMaximaSuppression3x3Kernel &operator=(NENonMaximaSuppression3x3Kernel &&) = default; + /** Default destructor */ + ~NENonMaximaSuppression3x3Kernel() = default; + + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8/F32 + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +protected: + /** Common signature for all the specialised non-maxima suppression 3x3 functions + * + * @param[in] input_ptr Pointer to the input tensor. + * @param[out] output_ptr Pointer to the output tensor + * @param[in] input_stride Stride of the input tensor + */ + using NonMaxSuppr3x3Function = void(const void *__restrict input_ptr, void *__restrict output_ptr, const uint32_t input_stride); + + NonMaxSuppr3x3Function *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */ + const ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ +}; + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 + */ +class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel +{ +public: + const char *name() const override + { + return "NENonMaximaSuppression3x3FP16Kernel"; + } + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); +}; +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 */ +using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel; +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +} // namespace arm_compute +#endif /* _ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H */ diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp index 1b72a3e277..27464d5b42 100644 --- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h" +#include "src/core/NEON/kernels/NENormalizationLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.h b/src/core/NEON/kernels/NENormalizationLayerKernel.h new file mode 100644 index 0000000000..53a06b9ed9 --- /dev/null +++ b/src/core/NEON/kernels/NENormalizationLayerKernel.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the normalization layer kernel. + */ +class NENormalizationLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NENormalizationLayerKernel"; + } + /** Default constructor */ + NENormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENormalizationLayerKernel(const NENormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENormalizationLayerKernel &operator=(const NENormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + NENormalizationLayerKernel(NENormalizationLayerKernel &&) = default; + /** Default move assignment operator */ + NENormalizationLayerKernel &operator=(NENormalizationLayerKernel &&) = default; + /** Default destructor */ + ~NENormalizationLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32. Data layouts supported: NCHW/NHWC. + * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], + * Data type and layout supported: same as @p input. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type and layout supported: same as @p input. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info); + /** Static function to check if given info will lead to a valid configuration of @ref NENormalizationLayerKernel + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32. Data layouts supported: NCHW/NHWC. + * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], + * Data type and layout supported: same as @p input. + * @param[in] output Destination tensor. Output will have the same number of dimensions as input. Data type and layout supported: same as @p input. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Function to perform normalization depending on the given template + * dimension. The second template parameter specifies whether the + * normalization has to be 1D or 2D. + * + * @note Only supported normalizations are: + * - 1D over X or Z + * - 2D over X and Y + * + * @param[in] window Region on which to execute the kernel. + */ + template + void normalize_float(const Window &window); + + /** Common signature for all the specialised normalization functions + * + * @param[in] window Region on which to execute the kernel. + */ + using NormalizationFunction = void (NENormalizationLayerKernel::*)(const Window &window); + +private: + NormalizationFunction _func; + const ITensor *_input; + const ITensor *_input_squared; + ITensor *_output; + NormalizationLayerInfo _norm_info; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEPadLayerKernel.cpp b/src/core/NEON/kernels/NEPadLayerKernel.cpp index ca9c5419e0..200fe2ce54 100644 --- a/src/core/NEON/kernels/NEPadLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPadLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h" +#include "src/core/NEON/kernels/NEPadLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEPadLayerKernel.h b/src/core/NEON/kernels/NEPadLayerKernel.h new file mode 100644 index 0000000000..ec4bdffdcd --- /dev/null +++ b/src/core/NEON/kernels/NEPadLayerKernel.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEPADLAYERKERNEL_H +#define ARM_COMPUTE_NEPADLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to add padding to a tensor + * + * Add padding given padding information + */ +class NEPadLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEPadLayerKernel"; + } + /** Default constructor */ + NEPadLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPadLayerKernel(const NEPadLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPadLayerKernel &operator=(const NEPadLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEPadLayerKernel(NEPadLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEPadLayerKernel &operator=(NEPadLayerKernel &&) = default; + /** Default destructor */ + ~NEPadLayerKernel() = default; + + /** Initialize the function + * + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] + * specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding + * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT. + * Only CONSTANT padding mode is currently supported + */ + void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT); + /** Static function to check if given info will lead to a valid configuration of @ref NEPadLayer. + * + * @param[in] input Source tensor info. Data types supported: All. + * @param[in] output Output tensor info. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] + * specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding + * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT. + * Only CONSTANT padding mode is currently supported + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run the padding function with constant padding + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run_pad_constant(const Window &window); + + /** Function to run the padding function with constant padding for 3D input and 1D, 2D, 3D padding + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + void run_pad_constant_uint8_3Dinput_3Dpad(const Window &window); + + /** Common signature for all the specialised permute functions + * + * @param[in] window Region on which to execute the kernel. + */ + using PadFunctionPtr = void (NEPadLayerKernel::*)(const Window &window); + + PadFunctionPtr _func; + const ITensor *_input; + ITensor *_output; + PaddingList _padding; + PixelValue _constant_value; + PaddingMode _mode; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEPADLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEPermuteKernel.cpp b/src/core/NEON/kernels/NEPermuteKernel.cpp index eab11ebfff..6a9f5d36ef 100644 --- a/src/core/NEON/kernels/NEPermuteKernel.cpp +++ b/src/core/NEON/kernels/NEPermuteKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h" +#include "src/core/NEON/kernels/NEPermuteKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -123,7 +123,7 @@ void NEPermuteKernel::run_permute(const Window &window) // Input window Window window_in = window; - // we only support these two configs in arm_compute/core/NEON/kernels/convolution/common/shims.hpp, for all others + // we only support these two configs in src/core/NEON/kernels/convolution/common/shims.hpp, for all others // we have to fall back to C++ if((input_layout == DataLayout::NCHW && _perm == PermutationVector{ 2U, 0U, 1U }) || (input_layout == DataLayout::NHWC && _perm == PermutationVector{ 1U, 2U, 0U })) { diff --git a/src/core/NEON/kernels/NEPermuteKernel.h b/src/core/NEON/kernels/NEPermuteKernel.h new file mode 100644 index 0000000000..80187de9eb --- /dev/null +++ b/src/core/NEON/kernels/NEPermuteKernel.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEPERMUTEKERNEL_H +#define ARM_COMPUTE_NEPERMUTEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** NEON kernel to perform tensor permutation. + * + * Permutes given a permutation vector + */ +class NEPermuteKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEPermuteKernel"; + } + /** Default constructor */ + NEPermuteKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPermuteKernel(const NEPermuteKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPermuteKernel &operator=(const NEPermuteKernel &) = delete; + /** Allow instances of this class to be moved */ + NEPermuteKernel(NEPermuteKernel &&) = default; + /** Allow instances of this class to be moved */ + NEPermuteKernel &operator=(NEPermuteKernel &&) = default; + /** Default destructor */ + ~NEPermuteKernel() = default; + + /** Set the input and output of the kernel. + * + * @note Arbitrary permutation vectors are supported with rank not greater than 4 + * + * @param[in] input The input tensor to permute. Data types supported: All + * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] perm Permutation vector + */ + void configure(const ITensor *input, ITensor *output, const PermutationVector &perm); + /** Static function to check if given info will lead to a valid configuration of @ref CPPPermuteKernel + * + * @note Arbitrary permutation vectors are supported with rank not greater than 4 + * + * @param[in] input The input tensor to permute. Data types supported: All + * @param[in] output The output tensor. Data types supported: Same as @p input + * @param[in] perm Permutation vector + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Template function to run the permute + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run_permute(const Window &window); + + /** Common signature for all the specialised permute functions + * + * @param[in] window Region on which to execute the kernel. + */ + using PermuteFunctionPtr = void (NEPermuteKernel::*)(const Window &window); + + PermuteFunctionPtr _func; + const ITensor *_input; + ITensor *_output; + PermutationVector _perm; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEPERMUTEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp index 0847cb1f23..8d17651f37 100644 --- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" +#include "src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" #include "arm_compute/core/TensorInfo.h" #include "src/core/CPP/Validate.h" diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h new file mode 100644 index 0000000000..d414168b2d --- /dev/null +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H +#define ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform addition between two tensors */ +class NEPixelWiseMultiplicationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEPixelWiseMultiplicationKernel"; + } + /** Default constructor */ + NEPixelWiseMultiplicationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPixelWiseMultiplicationKernel(const NEPixelWiseMultiplicationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPixelWiseMultiplicationKernel &operator=(const NEPixelWiseMultiplicationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEPixelWiseMultiplicationKernel(NEPixelWiseMultiplicationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEPixelWiseMultiplicationKernel &operator=(NEPixelWiseMultiplicationKernel &&) = default; + /** Default destructor */ + ~NEPixelWiseMultiplicationKernel() = default; + /** Initialise the kernel's input, output and border mode. + * + * Valid configurations (Input1,Input2) -> Output : + * + * Support: Broadcast? Scale=1/255? + * - (U8,U8) -> U8, S16 N Y + * - (U8,S16) -> S16 N Y + * - (S16,U8) -> S16 N Y + * - (S16,S16) -> S16 N Y + * - (S32,S32) -> S32 Y N + * - (F16,F16) -> F16 N Y + * - (F32,F32) -> F32 Y Y + * - (QASYMM8,QASYMM8) -> QASYMM8 Y Y + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED Y Y + * - (QSYMM16,QSYMM16) -> QSYMM16, S32 N Y + * + * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. + * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. + * + * @param[in] input1 First input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 + * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 + * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255 + * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype + * @param[in] rounding_policy Rounding policy. + */ + void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); + /** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplicationKernel + * + * Valid configurations (Input1,Input2) -> Output : + * Support: Broadcast? Scale=1/255? + * - (U8,U8) -> U8, S16 N Y + * - (U8,S16) -> S16 N Y + * - (S16,U8) -> S16 N Y + * - (S16,S16) -> S16 N Y + * - (S32,S32) -> S32 Y N + * - (F16,F16) -> F16 N Y + * - (F32,F32) -> F32 Y Y + * - (QASYMM8,QASYMM8) -> QASYMM8 Y Y + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED Y Y + * - (QSYMM16,QSYMM16) -> QSYMM16, S32 N Y + * + * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. + * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. + * + * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 + * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 + * @param[in] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255 + * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype + * @param[in] rounding_policy Rounding policy. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); + + // Inherited methods overridden + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised multiplication functions with integer scaling factor + * + * @param[in] in1 Input1 tensor object. + * @param[in] in2 Input2 tensor object. + * @param[out] out Output tensor object. + * @param[in] window Region on which to execute the kernel + * @param[in] scale Integer scale factor. + */ + using MulFunctionInt = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, int scale); + /** Common signature for all the specialised multiplication functions with float scaling factor + * + * @param[in] in1 Input1 tensor object. + * @param[in] in2 Input2 tensor object. + * @param[out] out Output tensor object. + * @param[in] window Region on which to execute the kernel + * @param[in] scale Float scale factor. + */ + using MulFunctionFloat = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, float scale); + /** Common signature for all the specialised QASYMM8 multiplication functions with float scaling factor + * + * @param[in] in1 Input1 tensor object. + * @param[in] in2 Input2 tensor object. + * @param[out] out Output tensor object. + * @param[in] window Region on which to execute the kernel + * @param[in] scale Float scale factor. + * + */ + using MulFunctionQuantized = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, float scale); + + MulFunctionFloat *_func_float; + MulFunctionInt *_func_int; + MulFunctionQuantized *_func_quantized; + +private: + float _scale; + int _scale_exponent; +}; + +/** Interface for the complex pixelwise multiplication kernel. */ +class NEComplexPixelWiseMultiplicationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEComplexPixelWiseMultiplicationKernel"; + } + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input1 An input tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor). + * @param[in] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1. + */ + void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEComplexPixelWiseMultiplicationKernel + * + * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor). + * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; +}; + +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index f9636dcb8d..0f0b9eed5a 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h" +#include "src/core/NEON/kernels/NEPoolingLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.h b/src/core/NEON/kernels/NEPoolingLayerKernel.h new file mode 100644 index 0000000000..aa3d2f3f01 --- /dev/null +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.h @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H +#define ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the pooling layer kernel */ +class NEPoolingLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEPoolingLayerKernel"; + } + /** Default constructor */ + NEPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPoolingLayerKernel(const NEPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPoolingLayerKernel &operator=(const NEPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEPoolingLayerKernel(NEPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEPoolingLayerKernel &operator=(NEPoolingLayerKernel &&) = default; + /** Default destructor */ + ~NEPoolingLayerKernel() = default; + /** Set the input and output tensors. + * + * @note F16 are supported for pool sizes 2 and 3 only + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. + */ + void configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info, ITensor *indices = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref NEPoolingLayerKernel + * + * @note F16 are supported for pool sizes 2 and 3 only + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /** Function to perform 2x2 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void pooling2_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + void pooling2_f32_nhwc_maxpool_indices(const Window &window_input, const Window &window); + /** Function to perform MxN pooling for 32-bit floating point values. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void poolingMxN_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Function to perform MxN pooling for 32-bit floating point values (NHWC). + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void poolingMxN_f32_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Function to perform 7x7 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void pooling7_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Function to perform 3x3 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void pooling3_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Function to perform 2x2 pooling for float16_t. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void pooling2_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Function to perform 2x2 pooling and compute the pooling indices for FP32/FP16. The indices can be used for max unpool. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling2_nchw_maxpool_indices(const Window &window_input, const Window &window); + /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + void pooling2_f16_nhwc_maxpool_indices(const Window &window_input, const Window &window); + /** Function to perform 3x3 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void pooling3_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Function to perform MxN pooling for 16-bit floating point values. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void poolingMxN_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Function to perform MxN pooling for 16-bit floating point values. (NHWC) + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + void poolingMxN_f16_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Template function to perform 2x2 pooling for 8bit quantized fixed point. (NCHW) + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + template + void pooling2_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Template function to perform 3x3 pooling for 8bit quantized fixed point. (NCHW) + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + template + void pooling3_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Template function to perform MxN pooling for 8-bit quantized. (NCHW) + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + template + void poolingMxN_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Template function to perform MxN pooling for 8-bit quantized. (NHWC) + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + template + void poolingMxN_q8_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); + /** Common signature for all the specialised Pooling functions + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + * @param[in] pooling_type Pooling operation to be computed. + * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. + */ + using PoolingFunction = void (NEPoolingLayerKernel::*)(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding); + +private: + PoolingFunction _func; + const ITensor *_input; + ITensor *_output; + ITensor *_indices; + PoolingLayerInfo _pool_info; + DataLayout _data_layout; + unsigned int _num_elems_processed_per_iteration; + BorderSize _border_size; + bool _is_square; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp index 06a1f14e5f..6757affae8 100644 --- a/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h" +#include "src/core/NEON/kernels/NEPriorBoxLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEPriorBoxLayerKernel.h b/src/core/NEON/kernels/NEPriorBoxLayerKernel.h new file mode 100644 index 0000000000..430a47f9f8 --- /dev/null +++ b/src/core/NEON/kernels/NEPriorBoxLayerKernel.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H +#define ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to calculate prior boxes */ +class NEPriorBoxLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEPriorBoxLayerKernel"; + } + /** Default constructor */ + NEPriorBoxLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPriorBoxLayerKernel(const NEPriorBoxLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPriorBoxLayerKernel &operator=(const NEPriorBoxLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEPriorBoxLayerKernel(NEPriorBoxLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEPriorBoxLayerKernel &operator=(NEPriorBoxLayerKernel &&) = default; + /** Default destructor */ + ~NEPriorBoxLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. + * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 + * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input + * @param[in] info Prior box layer info. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, const PriorBoxLayerInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref NEPriorBoxLayerKernel + * + * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC. + * @param[in] input2 Second source tensor info. Data types and layouts supported: same as @p input1 + * @param[in] output Destination tensor info. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input + * @param[in] info Prior box layer info. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Stores the coordinates of the calculated prior boxes. + * + * @param[out] out Output pointer. + * @param[in] offset Output offset to write to. + * @param[in] center_x Center pixel value on x-axis. + * @param[in] center_y Center pixel value on y-axis. + * @param[in] box_width Prior box width. + * @param[in] box_height Prior box height. + * @param[in] width Input width. + * @param[in] height Input height. + */ + void store_coordinates(float *out, const int offset, const float center_x, const float center_y, const float box_width, const float box_height, const int width, const int height); + /** Function to calculate prior boxes. + * + * @param[in] window Input region on which to execute the kernel. + */ + void calculate_prior_boxes(const Window &window); + + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; + PriorBoxLayerInfo _info; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp b/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp index 55585b4e00..8c1c8cf56b 100644 --- a/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" +#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h b/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h new file mode 100644 index 0000000000..ba68171a59 --- /dev/null +++ b/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H +#define ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H + +#include "src/core/NEON/INEKernel.h" +#include + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform layer normalization */ +class NEQLSTMLayerNormalizationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEQLSTMLayerNormalizationKernel"; + } + /** Default constructor */ + NEQLSTMLayerNormalizationKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEQLSTMLayerNormalizationKernel(const NEQLSTMLayerNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEQLSTMLayerNormalizationKernel &operator=(const NEQLSTMLayerNormalizationKernel &) = delete; + /** Default Move Constructor. */ + NEQLSTMLayerNormalizationKernel(NEQLSTMLayerNormalizationKernel &&) = default; + /** Default move assignment operator */ + NEQLSTMLayerNormalizationKernel &operator=(NEQLSTMLayerNormalizationKernel &&) = default; + /** Default destructor */ + ~NEQLSTMLayerNormalizationKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QSYMM16. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] weight Weight tensor. Data types supported: Same as @p input. + * @param[in] bias Bias tensor. Data types supported: S32 + */ + void configure(const ITensor *input, ITensor *output, const ITensor *weight, const ITensor *bias); + /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayerNormalizationKernel + * + * @param[in] input Source tensor info. Data types supported: QSYMM16. + * @param[in] output Destination tensor info. Data types supported: Same as @p input. + * @param[in] weight Weight tensor info. Data types supported: Same as @p input. + * @param[in] bias Bias tensor info. Data types supported: S32 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias); + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + // constants + static constexpr uint32_t max_input_dimension{ 2 }; /**< The maximum input dimension supported */ + static constexpr uint32_t max_weight_dimension{ 1 }; /**< The maximum weight dimension supported */ + static constexpr uint32_t max_bias_dimension{ 1 }; /**< The maximum bias dimension supported */ + static constexpr uint32_t vector_size_byte{ 16 }; /**< Computation vector size in byte */ + + using ComputeFuncType = std::function; + + ComputeFuncType _fn{}; /**< Function pointer to computation function */ + + const ITensor *_input + { + nullptr + }; /**< Input tensor */ + const ITensor *_weight + { + nullptr + }; /**< Weight tensor */ + const ITensor *_bias + { + nullptr + }; /**< Bias tensor */ + ITensor *_output{ nullptr }; /**< Output tensor */ + + int32_t _output_multiplier{}; /**< Multiplier for output values */ + int32_t _output_shift{}; /**< Shift value for output values */ + + int32_t _window_start_x{}; /**< The beginning of x-axis iteration */ + int32_t _window_end_x{}; /**< The end of x-axis iteration */ + int32_t _window_step_x{}; /**< The size of x-axis iteration's step */ + + Window _inout_window{}; /**< Window for input and output tensor */ + Window _weight_window{}; /**< Window for weight and bias tensor */ + + /** Function to configure initial windows for destination of computation + * + * @param[in] Target destination tensor to use for output window + * + * @return configured window + */ + Window configure_window(ITensor *target); + // Function to compute for data type QSYMM16 + void compute_qsymm16(); + /** Function to compute summation and summation of squared input of the given input pointer + * + * @param[in] Input_ptr pointer to input array + * + */ + std::pair sum_qsymm16(const int16_t *input_ptr); + /** Function to normalize values using computed mean and standard deviation + * + * @param[in] input_ptr Pointer to input array + * @param[in] output_ptr Pointer to output array + * @param[in] weight_ptr Pointer to weight array + * @param[in] bias_ptr Pointer to bias array + * @param[in] mean Mean value + * @param[in] inv_std_mul Quantized multiplier for standard deviation + * @param[in] inv_std_shift Shift for standard deviation + * + */ + void normalize_qasymm16(const int16_t *input_ptr, + int16_t *output_ptr, + const int16_t *weight_ptr, + const int32_t *bias_ptr, + int32_t mean, int32_t inv_std_mul, int32_t inv_std_shift); + /** Function to compute output quantization information */ + QuantizationInfo compute_output_qinfo(); +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp index 990e4b67bc..ff3d9fff96 100644 --- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h" +#include "src/core/NEON/kernels/NEQuantizationLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.h b/src/core/NEON/kernels/NEQuantizationLayerKernel.h new file mode 100644 index 0000000000..5ee0ed4412 --- /dev/null +++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H +#define ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the quantization layer kernel. + * + * @note The implementation supports only 3D input tensors + * + */ +class NEQuantizationLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEQuantizationLayerKernel"; + } + /** Default constructor */ + NEQuantizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEQuantizationLayerKernel(const NEQuantizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEQuantizationLayerKernel &operator=(const NEQuantizationLayerKernel &) = delete; + /** Default Move Constructor. */ + NEQuantizationLayerKernel(NEQuantizationLayerKernel &&) = default; + /** Default move assignment operator */ + NEQuantizationLayerKernel &operator=(NEQuantizationLayerKernel &&) = default; + /** Default destructor */ + ~NEQuantizationLayerKernel() = default; + /** Set the input, output. + * + * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. + * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. + * + * @note Output auto initialization is not supported by this kernel + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayerKernel + * + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. + * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised @ref NEQuantizationLayerKernel functions + * + * @param[in] window Region on which to execute the kernel. + */ + using QuantizationFunctionExecutorPtr = void (NEQuantizationLayerKernel::*)(const Window &window); + /** Function to apply QASYMM8 or QASYMM8_SIGNED quantization on a tensor. + * + * @param[in] window Region on which to execute the kernel. + */ + template + void run_quantize_qasymm8(const Window &window); + /** Function to apply QASYMM16 quantization on a tensor. + * + * @param[in] window Region on which to execute the kernel. + */ + template + void run_quantize_qasymm16(const Window &window); + + const ITensor *_input; + ITensor *_output; + + QuantizationFunctionExecutorPtr _func; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp b/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp index 79f7888eba..c48cda8b8e 100644 --- a/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp +++ b/src/core/NEON/kernels/NEROIAlignLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h" +#include "src/core/NEON/kernels/NEROIAlignLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" diff --git a/src/core/NEON/kernels/NEROIAlignLayerKernel.h b/src/core/NEON/kernels/NEROIAlignLayerKernel.h new file mode 100644 index 0000000000..d909fb1758 --- /dev/null +++ b/src/core/NEON/kernels/NEROIAlignLayerKernel.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H +#define ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the RoIAlign kernel. + */ +class NEROIAlignLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEROIAlignLayerKernel"; + } + + /** Constructor */ + NEROIAlignLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEROIAlignLayerKernel(const NEROIAlignLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEROIAlignLayerKernel &operator=(const NEROIAlignLayerKernel &) = delete; + /** Default Move Constructor. */ + NEROIAlignLayerKernel(NEROIAlignLayerKernel &&) = default; + /** Default move assignment operator. */ + NEROIAlignLayerKernel &operator=(NEROIAlignLayerKernel &&) = default; + /** Default destructor */ + ~NEROIAlignLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. + * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner + * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. + * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, otherwise same as @p input + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + */ + void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info); + /** Static function to check if given info will lead to a valid configuration of @ref NEROIAlignLayerKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. + * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, + * otherwise same as @p input + * @param[in] output Destination tensor info. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + * + * @return a Status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + template + void internal_run(const Window &window, const ThreadInfo &info); + + const ITensor *_input; + ITensor *_output; + const ITensor *_rois; + ROIPoolingLayerInfo _pool_info; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H*/ diff --git a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp index a3171d9aa6..40dae828a3 100644 --- a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h" +#include "src/core/NEON/kernels/NEROIPoolingLayerKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" diff --git a/src/core/NEON/kernels/NEROIPoolingLayerKernel.h b/src/core/NEON/kernels/NEROIPoolingLayerKernel.h new file mode 100644 index 0000000000..36424172a6 --- /dev/null +++ b/src/core/NEON/kernels/NEROIPoolingLayerKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H +#define ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +#include "arm_compute/core/IArray.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the ROI pooling layer kernel */ +class NEROIPoolingLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEROIPoolingLayerKernel"; + } + /** Default constructor */ + NEROIPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEROIPoolingLayerKernel(const NEROIPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEROIPoolingLayerKernel &operator=(const NEROIPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEROIPoolingLayerKernel(NEROIPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEROIPoolingLayerKernel &operator=(NEROIPoolingLayerKernel &&) = default; + /** Default destructor */ + ~NEROIPoolingLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F32. + * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner + * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16 + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as that specified by @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois tensor. + */ + void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + const ITensor *_rois; + ITensor *_output; + ROIPoolingLayerInfo _pool_info; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NERangeKernel.cpp b/src/core/NEON/kernels/NERangeKernel.cpp index 3466794b11..8d11122ab2 100644 --- a/src/core/NEON/kernels/NERangeKernel.cpp +++ b/src/core/NEON/kernels/NERangeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NERangeKernel.h" +#include "src/core/NEON/kernels/NERangeKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NERangeKernel.h b/src/core/NEON/kernels/NERangeKernel.h new file mode 100644 index 0000000000..7c42ef11dc --- /dev/null +++ b/src/core/NEON/kernels/NERangeKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NERANGEKERNEL_H +#define ARM_COMPUTE_NERANGEKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Kernel class for Range + * + * range generates a 1-D tensor containing a sequence of numbers that begins at 'start' and extends by increments + * of 'step' up to but not including 'end'. + */ +class NERangeKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NERangeKernel"; + } + /** Default constructor */ + NERangeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERangeKernel(const NERangeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERangeKernel &operator=(const NERangeKernel &) = delete; + /** Allow instances of this class to be moved */ + NERangeKernel(NERangeKernel &&) = default; + /** Allow instances of this class to be moved */ + NERangeKernel &operator=(NERangeKernel &&) = default; + /** Default destructor */ + ~NERangeKernel() = default; + /** Initialize the kernel's output tensor, start, end and step of the sequence. + * + * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] start The starting value of the sequence. + * @param[in] end The ending (not including) value of the sequence. + * @param[in] step The gap between each pair of values in the sequence. + */ + void configure(ITensor *output, float start, float end, float step); + /** Static function to check if given info will lead to a valid configuration of @ref NERangeKernel + * + * @param[in] output Output tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] start The starting value of the sequence. + * @param[in] end The ending (not including) value of the sequence. + * @param[in] step The gap between each pair of values in the sequence. + * + * @return a status + */ + static Status validate(const ITensorInfo *output, float start, float end, float step); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + using RangeFunction = void(ITensor *output, float start, float step, const Window &window); + + RangeFunction *_func; /**< Range function to be called */ + float _start; /**< Start of sequence */ + float _end; /**< End of sequence */ + float _step; /**< Increment/step value */ + ITensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NERANGEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index 716b092396..4e63dd95aa 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -21,18 +21,18 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h" +#include "src/core/NEON/kernels/NEReductionOperationKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/CPP/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/NEON/NEMath.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.h b/src/core/NEON/kernels/NEReductionOperationKernel.h new file mode 100644 index 0000000000..dfc105adae --- /dev/null +++ b/src/core/NEON/kernels/NEReductionOperationKernel.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H +#define ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a reduction operation + * + * @note For ARG_MIN/ARG_MAX reduction, the default data type for an uninitialized + * output tensor is signed 32-bit integer (S32). It is the user's responsibility + * to check that the results do not overflow because the indices are computed + * in unsigned 32-bit (U32). + */ +class NEReductionOperationKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEReductionOperationKernel"; + } + /** Default constructor */ + NEReductionOperationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReductionOperationKernel(const NEReductionOperationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReductionOperationKernel &operator=(const NEReductionOperationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEReductionOperationKernel(NEReductionOperationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEReductionOperationKernel &operator=(NEReductionOperationKernel &&) = default; + /** Default destructor */ + ~NEReductionOperationKernel() = default; + + /** Set the source, destination of the kernel + * + * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. + * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 + * @param[in] op Reduction operation to perform. + */ + void configure(const ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op); + + /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperationKernel. + * + * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. + * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 + * @param[in] op Reduction operation to perform. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; + unsigned int _reduction_axis; + ReductionOperation _op; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NERemapKernel.cpp b/src/core/NEON/kernels/NERemapKernel.cpp index f698439507..b334a11227 100644 --- a/src/core/NEON/kernels/NERemapKernel.cpp +++ b/src/core/NEON/kernels/NERemapKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NERemapKernel.h" +#include "src/core/NEON/kernels/NERemapKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NERemapKernel.h b/src/core/NEON/kernels/NERemapKernel.h new file mode 100644 index 0000000000..8fe1ba5855 --- /dev/null +++ b/src/core/NEON/kernels/NERemapKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEREMAPKERNEL_H +#define ARM_COMPUTE_NEREMAPKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a remap on a tensor */ +class NERemapKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NERemapKernel"; + } + /** Default constructor */ + NERemapKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERemapKernel(const NERemapKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERemapKernel &operator=(const NERemapKernel &) = delete; + /** Allow instances of this class to be moved */ + NERemapKernel(NERemapKernel &&) = default; + /** Allow instances of this class to be moved */ + NERemapKernel &operator=(NERemapKernel &&) = default; + /** Default destructor */ + ~NERemapKernel() = default; + + /** Initialize the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] map_x Map for X coordinates. Data type supported: F32. + * @param[in] map_y Map for Y coordinates. Data type supported: F32. + * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. + * @param[in] policy The interpolation type. + */ + void configure(const ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + /** function to perform nearest interpolation on the given window */ + void remap_nearest(const Window &window); + /** function to perform bilinear interpolation on the given window */ + void remap_bilinear(const Window &window); + /** Remap function to use for the particular interpolation type passed to configure() */ + void (NERemapKernel::*_func)(const Window &window); + + const ITensor *_input; /**< Input image */ + ITensor *_output; /**< Output image */ + const ITensor *_map_x; /**< Input remap x coordinates */ + const ITensor *_map_y; /**< Input remap y coordinates */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEREMAPKERNEL_H */ diff --git a/src/core/NEON/kernels/NEReorgLayerKernel.cpp b/src/core/NEON/kernels/NEReorgLayerKernel.cpp index 1c48a5c93d..0dcb439665 100644 --- a/src/core/NEON/kernels/NEReorgLayerKernel.cpp +++ b/src/core/NEON/kernels/NEReorgLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEReorgLayerKernel.h" +#include "src/core/NEON/kernels/NEReorgLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEReorgLayerKernel.h b/src/core/NEON/kernels/NEReorgLayerKernel.h new file mode 100644 index 0000000000..eac91154a1 --- /dev/null +++ b/src/core/NEON/kernels/NEReorgLayerKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEREORGLAYERKERNEL_H +#define ARM_COMPUTE_NEREORGLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the kernel to perform tensor re-organization */ +class NEReorgLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEReorgLayerKernel"; + } + /** Default constructor */ + NEReorgLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReorgLayerKernel(const NEReorgLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReorgLayerKernel &operator=(const NEReorgLayerKernel &) = delete; + /** Default Move Constructor. */ + NEReorgLayerKernel(NEReorgLayerKernel &&) = default; + /** Default move assignment operator */ + NEReorgLayerKernel &operator=(NEReorgLayerKernel &&) = default; + /** Default destructor */ + ~NEReorgLayerKernel() = default; + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. Data type supported: All + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] stride Stride to be used during data re-organization. + * It defines the spatial distance between 2 consecutive pixels in the x and y direction + */ + void configure(const ITensor *input, ITensor *output, int32_t stride); + + /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel + * + * @param[in] input Source tensor info. Data type supported: All + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * @param[in] stride Stride to be used during data re-organization + * It defines the spatial distance between 2 consecutive pixels in the x and y direction + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t stride); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; + int32_t _stride; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEREORGLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp index 7946812811..462404f996 100644 --- a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp +++ b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp @@ -21,18 +21,18 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" +#include "src/core/NEON/kernels/NEReshapeLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "src/core/AccessWindowStatic.h" #include "src/core/CPP/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" diff --git a/src/core/NEON/kernels/NEReshapeLayerKernel.h b/src/core/NEON/kernels/NEReshapeLayerKernel.h new file mode 100644 index 0000000000..ecec8d9f1f --- /dev/null +++ b/src/core/NEON/kernels/NEReshapeLayerKernel.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NERESHAPELAYERKERNEL_H +#define ARM_COMPUTE_NERESHAPELAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the kernel to perform tensor reshaping */ +class NEReshapeLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEReshapeLayerKernel"; + } + /** Default constructor */ + NEReshapeLayerKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReshapeLayerKernel(const NEReshapeLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReshapeLayerKernel &operator=(const NEReshapeLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEReshapeLayerKernel(NEReshapeLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEReshapeLayerKernel &operator=(NEReshapeLayerKernel &&) = default; + /** Default destructor */ + ~NEReshapeLayerKernel() = default; + /** Set the input and output info of the kernel + * + * @param[in] input Source tensor info. Data type supported: All + * @param[out] output Destination tensor info. Data type supported: Same as @p input + */ + void configure(const ITensorInfo *input, ITensorInfo *output); + + /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel + * + * @param[in] input Source tensor info. Data type supported: All + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NERESHAPELAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp index 2c081cb917..21c758053a 100644 --- a/src/core/NEON/kernels/NEReverseKernel.cpp +++ b/src/core/NEON/kernels/NEReverseKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEReverseKernel.h" +#include "src/core/NEON/kernels/NEReverseKernel.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" diff --git a/src/core/NEON/kernels/NEReverseKernel.h b/src/core/NEON/kernels/NEReverseKernel.h new file mode 100644 index 0000000000..07b547a327 --- /dev/null +++ b/src/core/NEON/kernels/NEReverseKernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEREVERSEKERNEL_H +#define ARM_COMPUTE_NEREVERSEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the reverse layer kernel. */ +class NEReverseKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEReverseKernel"; + } + /** Default constructor */ + NEReverseKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReverseKernel(const NEReverseKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReverseKernel &operator=(const NEReverseKernel &) = delete; + /** Allow instances of this class to be moved */ + NEReverseKernel(NEReverseKernel &&) = default; + /** Allow instances of this class to be moved */ + NEReverseKernel &operator=(NEReverseKernel &&) = default; + /** Default destructor */ + ~NEReverseKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 + */ + void configure(const ITensor *input, ITensor *output, const ITensor *axis); + + /** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] output Output tensor info. Data type supported: Same as @p input + * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; + const ITensor *_axis; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEREVERSEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp index 2e40759050..5a6d49bf07 100644 --- a/src/core/NEON/kernels/NEScaleKernel.cpp +++ b/src/core/NEON/kernels/NEScaleKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEScaleKernel.h" +#include "src/core/NEON/kernels/NEScaleKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Window.h" diff --git a/src/core/NEON/kernels/NEScaleKernel.h b/src/core/NEON/kernels/NEScaleKernel.h new file mode 100644 index 0000000000..a3786db5b7 --- /dev/null +++ b/src/core/NEON/kernels/NEScaleKernel.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESCALEKERNEL_H +#define ARM_COMPUTE_NESCALEKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform scaling on a tensor */ +class NEScaleKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEScaleKernel"; + } + /** Default constructor */ + NEScaleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScaleKernel(const NEScaleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScaleKernel &operator=(const NEScaleKernel &) = delete; + /** Allow instances of this class to be moved */ + NEScaleKernel(NEScaleKernel &&) = default; + /** Allow instances of this class to be moved */ + NEScaleKernel &operator=(NEScaleKernel &&) = default; + /** Default destructor */ + ~NEScaleKernel() = default; + + /** Initialise the kernel's inputs, output and interpolation policy + * + * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor + * @note Using @p policy Area only supports data layout NCHW and input data type U8. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32 + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32 + * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] info @ref ScaleKernelInfo to use for configuration + */ + void configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output, + const ScaleKernelInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref NEScaleKernel + * + * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor + * @note Using @p policy Area only supports data layout NCHW and input data type U8. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32 + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32 + * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32. + * @param[in] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] info @ref ScaleKernelInfo to use for validation + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *dx, const ITensorInfo *dy, const ITensorInfo *offsets, ITensorInfo *output, + const ScaleKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** function to perform scale using area interpolation on the given window + * + * @note Used only in case down-sampling. + */ + void scale_area_nchw_u8(const Window &window); + + /** function to perform scale using bilinear interpolation on the given window */ + template + void scale_bilinear_nchw(const Window &window); + /** function to perform scale using bilinear interpolation on the given window */ + template + void scale_bilinear_nhwc(const Window &window); + /** function to perform scale using bilinear interpolation on the given window */ + template + void scale_bilinear_qasymm(const Window &window); + + /** function to perform scale using nearest neighbour on the given window */ + template + void scale_nearest_nchw(const Window &window); + /** function to perform scale using nearest neighbour on the given window */ + template + void scale_nearest_nhwc(const Window &window); + + /** Scale function to use for the particular function to use */ + using ScaleFunctionPtr = void (NEScaleKernel::*)(const Window &window); + + ScaleFunctionPtr _func; + const ITensor *_offsets; + const ITensor *_dx; + const ITensor *_dy; + const ITensor *_input; + ITensor *_output; + InterpolationPolicy _policy; + BorderMode _border_mode; + PixelValue _constant_border_value; + float _sampling_offset; + bool _align_corners; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NESCALEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEScharr3x3Kernel.cpp b/src/core/NEON/kernels/NEScharr3x3Kernel.cpp index eb1dc65c0f..58b8caa2b6 100644 --- a/src/core/NEON/kernels/NEScharr3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEScharr3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h" +#include "src/core/NEON/kernels/NEScharr3x3Kernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEScharr3x3Kernel.h b/src/core/NEON/kernels/NEScharr3x3Kernel.h new file mode 100644 index 0000000000..920410ebb3 --- /dev/null +++ b/src/core/NEON/kernels/NEScharr3x3Kernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESCHARR3x3KERNEL_H +#define ARM_COMPUTE_NESCHARR3x3KERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run a 3x3 Scharr filter on a tensor. + * +* @f[ +* \mathbf{G}_x=\begin{vmatrix} +* -3 & 0 & +3\\ +* -10& 0 & +10\\ +* -3 & 0 & +3 +* \end{vmatrix} +* @f] +*/ +class NEScharr3x3Kernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEScharr3x3Kernel"; + } + /** Default constructor */ + NEScharr3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScharr3x3Kernel(const NEScharr3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScharr3x3Kernel &operator=(const NEScharr3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NEScharr3x3Kernel(NEScharr3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NEScharr3x3Kernel &operator=(NEScharr3x3Kernel &&) = default; + /** Default destructor */ + ~NEScharr3x3Kernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + bool _run_scharr_x; /**< Do we need to run Scharr X ? */ + bool _run_scharr_y; /**< Do we need to run Scharr Y ? */ + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< Output tensor for scharr X */ + ITensor *_output_y; /**< Output tensor for scharr Y */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NESCHARR3x3KERNEL_H */ diff --git a/src/core/NEON/kernels/NESelectKernel.cpp b/src/core/NEON/kernels/NESelectKernel.cpp index 2f36db2ddb..9cf9b98a0c 100644 --- a/src/core/NEON/kernels/NESelectKernel.cpp +++ b/src/core/NEON/kernels/NESelectKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NESelectKernel.h" +#include "src/core/NEON/kernels/NESelectKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NESelectKernel.h b/src/core/NEON/kernels/NESelectKernel.h new file mode 100644 index 0000000000..f7142feff8 --- /dev/null +++ b/src/core/NEON/kernels/NESelectKernel.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESELECTKERNEL_H +#define ARM_COMPUTE_NESELECTKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the select kernel + * + * Select is computed by: + * @f[ output(i) = condition(i) ? x(i) : y(i) @f] + * + */ +class NESelectKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NESelectKernel"; + } + /** Default constructor */ + NESelectKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESelectKernel(const NESelectKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESelectKernel &operator=(const NESelectKernel &) = delete; + /** Allow instances of this class to be moved */ + NESelectKernel(NESelectKernel &&) = default; + /** Allow instances of this class to be moved */ + NESelectKernel &operator=(NESelectKernel &&) = default; + /** Default destructor */ + ~NESelectKernel() = default; + + /** Common signature for all the specialised elementwise functions + * + * @param[in] c Condition input tensor. Data types supported: U8. + * @param[in] x First input tensor. Data types supported: All. + * @param[out] y Second input tensor. Data types supported: Same as @p x + * @param[in] output Output tensor. Data types supported: Same as @p x + */ + void configure(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output); + + /** Validate the argument passed to the kernel + * + * @param[in] c Condition input tensor. Data types supported: U8. + * @param[in] x First input tensor. Data types supported: All. + * @param[in] y Second input tensor. Data types supported: Same as @p x + * @param[in] output Output tensor. Data types supported: Same as @p x. + * + * @return a status + */ + static Status validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialised select functions + * + * @param[in] c Condition input tensor. Data types supported: U8. + * @param[in] x First input tensor. Data types supported: All. + * @param[in] y Second input tensor. Data types supported: Same as @p x + * @param[in] output Output tensor. Data types supported: Same as @p x. + */ + using SelectFunction = void(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window); + + /** Select function to use for the particular tensor types passed to configure() */ + SelectFunction *_function; + const ITensor *_c; /**< Condition tensor */ + const ITensor *_x; /**< Source tensor 1 */ + const ITensor *_y; /**< Source tensor 2 */ + ITensor *_output; /**< Destination tensor */ + bool _has_same_rank; /**< Flag that indicates if condition tensor and other inputs have the same rank */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NESELECTKERNEL_H */ diff --git a/src/core/NEON/kernels/NESobel3x3Kernel.cpp b/src/core/NEON/kernels/NESobel3x3Kernel.cpp index 1c7089b641..ecf6b59c29 100644 --- a/src/core/NEON/kernels/NESobel3x3Kernel.cpp +++ b/src/core/NEON/kernels/NESobel3x3Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NESobel3x3Kernel.h" +#include "src/core/NEON/kernels/NESobel3x3Kernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NESobel3x3Kernel.h b/src/core/NEON/kernels/NESobel3x3Kernel.h new file mode 100644 index 0000000000..2c3eaf5eb7 --- /dev/null +++ b/src/core/NEON/kernels/NESobel3x3Kernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESOBEL3x3KERNEL_H +#define ARM_COMPUTE_NESOBEL3x3KERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run a 3x3 Sobel X filter on a tensor. + * + * @f[ + * \mathbf{G}_x=\begin{vmatrix} + * -1 & 0 & +1\\ + * -2 & 0 & +2\\ + * -1 & 0 & +1 + * \end{vmatrix} + * @f] +*/ +class NESobel3x3Kernel : public INEKernel +{ +public: + const char *name() const override + { + return "NESobel3x3Kernel"; + } + /** Default constructor */ + NESobel3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel3x3Kernel(const NESobel3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel3x3Kernel &operator=(const NESobel3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel3x3Kernel(NESobel3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel3x3Kernel &operator=(NESobel3x3Kernel &&) = default; + /** Default destructor */ + ~NESobel3x3Kernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< Output tensor for sobel X */ + ITensor *_output_y; /**< Output tensor for sobel Y */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NESOBEL3x3KERNEL_H */ diff --git a/src/core/NEON/kernels/NESobel5x5Kernel.cpp b/src/core/NEON/kernels/NESobel5x5Kernel.cpp index 2421ea72ad..5a66b1f364 100644 --- a/src/core/NEON/kernels/NESobel5x5Kernel.cpp +++ b/src/core/NEON/kernels/NESobel5x5Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h" +#include "src/core/NEON/kernels/NESobel5x5Kernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NESobel5x5Kernel.h b/src/core/NEON/kernels/NESobel5x5Kernel.h new file mode 100644 index 0000000000..bd5eb29296 --- /dev/null +++ b/src/core/NEON/kernels/NESobel5x5Kernel.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESOBEL5x5KERNEL_H +#define ARM_COMPUTE_NESOBEL5x5KERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. + * + */ +class NESobel5x5HorKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NESobel5x5HorKernel"; + } + /** Default constructor */ + NESobel5x5HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5HorKernel(const NESobel5x5HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5HorKernel &operator=(const NESobel5x5HorKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel5x5HorKernel(NESobel5x5HorKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel5x5HorKernel &operator=(NESobel5x5HorKernel &&) = default; + /** Default destructor */ + ~NESobel5x5HorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< X output of horizontal pass */ + ITensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 5x5 Sobel Y filter on a tensor. + * +*/ +class NESobel5x5VertKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NESobel5x5VertKernel"; + } + /** Default constructor */ + NESobel5x5VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5VertKernel(const NESobel5x5VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5VertKernel &operator=(const NESobel5x5VertKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel5x5VertKernel(NESobel5x5VertKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel5x5VertKernel &operator=(NESobel5x5VertKernel &&) = default; + /** Default destructor */ + ~NESobel5x5VertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input_x Input for X (X output of hor pass). Data type supported: S16. + * @param[in] input_y Input for Y (Y output of hor pass). Data type supported: S16. + * @param[out] output_x Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(ITensor *input_x, ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + ITensor *_input_x; /**< X input (X output of the hor pass) */ + ITensor *_input_y; /**< Y input (Y output of the hor pass) */ + ITensor *_output_x; /**< X output of sobel */ + ITensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NESOBEL5x5KERNEL_H */ diff --git a/src/core/NEON/kernels/NESobel7x7Kernel.cpp b/src/core/NEON/kernels/NESobel7x7Kernel.cpp index 779d67a044..835b333a10 100644 --- a/src/core/NEON/kernels/NESobel7x7Kernel.cpp +++ b/src/core/NEON/kernels/NESobel7x7Kernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h" +#include "src/core/NEON/kernels/NESobel7x7Kernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NESobel7x7Kernel.h b/src/core/NEON/kernels/NESobel7x7Kernel.h new file mode 100644 index 0000000000..c5a3899bab --- /dev/null +++ b/src/core/NEON/kernels/NESobel7x7Kernel.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESOBEL7x7KERNEL_H +#define ARM_COMPUTE_NESOBEL7x7KERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. + * + */ +class NESobel7x7HorKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NESobel7x7HorKernel"; + } + /** Default constructor */ + NESobel7x7HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7HorKernel(const NESobel7x7HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7HorKernel &operator=(const NESobel7x7HorKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel7x7HorKernel(NESobel7x7HorKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel7x7HorKernel &operator=(NESobel7x7HorKernel &&) = default; + /** Default destructor */ + ~NESobel7x7HorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< X output of horizontal pass */ + ITensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 7x7 Sobel Y filter on a tensor. + * +*/ +class NESobel7x7VertKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NESobel7x7VertKernel"; + } + /** Default constructor */ + NESobel7x7VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7VertKernel(const NESobel7x7VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7VertKernel &operator=(const NESobel7x7VertKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel7x7VertKernel(NESobel7x7VertKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel7x7VertKernel &operator=(NESobel7x7VertKernel &&) = default; + /** Default destructor */ + ~NESobel7x7VertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set + * @note If output_x is set then input_x must be set too + * @note If output_y is set then input_y must be set too + * + * @param[in] input_x (Optional) Input for X (X output of hor pass). Data type supported: S32. + * @param[in] input_y (Optional) Input for Y (Y output of hor pass). Data type supported: S32. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input_x, const ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + const ITensor *_input_x; /**< X input (X output of the hor pass) */ + const ITensor *_input_y; /**< Y input (Y output of the hor pass) */ + ITensor *_output_x; /**< X output of sobel */ + ITensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NESOBEL7x7KERNEL_H */ diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index 13f0a54275..97797cefde 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" +#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.h b/src/core/NEON/kernels/NESoftmaxLayerKernel.h new file mode 100644 index 0000000000..adc2e57258 --- /dev/null +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H +#define ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the identifying the max value of 1D Logits */ +class NELogits1DMaxKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NELogits1DMaxKernel"; + } + /** Default constructor */ + NELogits1DMaxKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DMaxKernel(const NELogits1DMaxKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DMaxKernel &operator=(const NELogits1DMaxKernel &) = delete; + /** Allow instances of this class to be moved */ + NELogits1DMaxKernel(NELogits1DMaxKernel &&) = default; + /** Allow instances of this class to be moved */ + NELogits1DMaxKernel &operator=(NELogits1DMaxKernel &&) = default; + /** Default destructor */ + ~NELogits1DMaxKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DMaxKernel + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Destination tensor. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + using Logits1DMaxFunction = void(const ITensor &in, ITensor &out, const Window &window); + +private: + Logits1DMaxFunction *_func; + BorderSize _border_size; +}; + +/** Interface for softmax computation for QASYMM8 with pre-computed max. */ +template +class NELogits1DSoftmaxKernel : public INEKernel +{ +public: + const char *name() const override + { + if(IS_LOG) + { + return "NELogits1DSoftmaxKernel"; + } + else + { + return "NELogits1DLogSoftmaxKernel"; + } + } + /** Default constructor */ + NELogits1DSoftmaxKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DSoftmaxKernel(const NELogits1DSoftmaxKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DSoftmaxKernel &operator=(const NELogits1DSoftmaxKernel &) = delete; + /** Allow instances of this class to be moved */ + NELogits1DSoftmaxKernel(NELogits1DSoftmaxKernel &&) = default; + /** Allow instances of this class to be moved */ + NELogits1DSoftmaxKernel &operator=(NELogits1DSoftmaxKernel &&) = default; + /** Default destructor */ + ~NELogits1DSoftmaxKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] max Max values tensor. Same shape as input with dimension 0 set to 1. + * Data types supported: same as @p input. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] beta A scaling factor for the exponent. + * + * @param tmp Auxiliary tensor. Must be type F32 and same shape as the input. + */ + void configure(const ITensor *input, const ITensor *max, ITensor *output, const float beta, ITensor *tmp); + /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DSoftmaxKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] max Max values tensor info. Same shape as input with dimension 0 set to 1. + * Data types supported: same as @p input. + * @param[in] output Destination tensor info. Data types supported: same as @p input. + * @param[in] beta A scaling factor for the exponent. + * @param[in] tmp Tensor info of auxiliary. Must be type F32 and same shape as the input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *max, + const ITensorInfo *output, const float beta, const ITensorInfo *tmp); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + using LogitsSoftmaxFunction = void(const ITensor &in, const ITensor &max, void *const tmp, ITensor &out, const float beta, + const Window &window); + + LogitsSoftmaxFunction *_func; + const ITensor *_input; + const ITensor *_max; + ITensor *_output; + float _beta; + ITensor *_tmp; //Temporary. Used internally +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp index 3293466979..27b3154298 100644 --- a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp +++ b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h" +#include "src/core/NEON/kernels/NESpaceToBatchLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.h b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.h new file mode 100644 index 0000000000..627724580b --- /dev/null +++ b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H +#define ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declaration +class ITensor; + +/** Interface for the space to batch kernel */ +class NESpaceToBatchLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NESpaceToBatchLayerKernel"; + } + /** Default constructor */ + NESpaceToBatchLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESpaceToBatchLayerKernel(const NESpaceToBatchLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESpaceToBatchLayerKernel &operator=(const NESpaceToBatchLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NESpaceToBatchLayerKernel(NESpaceToBatchLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NESpaceToBatchLayerKernel &operator=(NESpaceToBatchLayerKernel &&) = default; + /** Default destructor */ + ~NESpaceToBatchLayerKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, ITensor *output); + /** Initialise the kernel's input and output. (Static block shape and paddings) + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[in] padding_left The left padding of the output tensor. + * @param[in] padding_right The right padding of the output tensor. + * @param[out] output Tensor output. Data types supported: same as @p input + */ + void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 + * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 + * @param[in] output Tensor output. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel (Static block shape and paddings) + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[in] block_shape_x Block shape x value. + * @param[in] block_shape_y Block shape y value. + * @param[in] padding_left The left padding of the output tensor. + * @param[in] padding_right The right padding of the output tensor. + * @param[in] output Tensor output. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; /**< Source tensor */ + const ITensor *_block_shape; /**< Block shape tensor */ + const ITensor *_paddings; /**< Paddings tensor */ + ITensor *_output; /**< Destination tensor */ + DataLayout _data_layout; /**< Data layout to be used at run-time */ + + Size2D _padding_left; + int _block_shape_x; + int _block_shape_y; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp index 7c9cc4996b..7687c50c40 100644 --- a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp +++ b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h" +#include "src/core/NEON/kernels/NESpaceToDepthLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.h b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.h new file mode 100644 index 0000000000..953b68a401 --- /dev/null +++ b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H +#define ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the space to depth kernel */ +class NESpaceToDepthLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NESpaceToDepthLayerKernel"; + } + /** Default constructor */ + NESpaceToDepthLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESpaceToDepthLayerKernel(const NESpaceToDepthLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESpaceToDepthLayerKernel &operator=(const NESpaceToDepthLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NESpaceToDepthLayerKernel(NESpaceToDepthLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NESpaceToDepthLayerKernel &operator=(NESpaceToDepthLayerKernel &&) = default; + /** Default destructor */ + ~NESpaceToDepthLayerKernel() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape value + */ + void configure(const ITensor *input, ITensor *output, int32_t block_shape); + /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToDepthLayerKernel + * + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. + * @param[in] output Tensor output info. Data types supported: same as @p input + * @param[in] block_shape Block shape value + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ + int32_t _block_shape; /**< Block shape */ + DataLayout _data_layout; /**< Data layout of the operation */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEStackLayerKernel.cpp b/src/core/NEON/kernels/NEStackLayerKernel.cpp index ad7f1b1300..55170a169a 100644 --- a/src/core/NEON/kernels/NEStackLayerKernel.cpp +++ b/src/core/NEON/kernels/NEStackLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEStackLayerKernel.h" +#include "src/core/NEON/kernels/NEStackLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEStackLayerKernel.h b/src/core/NEON/kernels/NEStackLayerKernel.h new file mode 100644 index 0000000000..9b0a039b88 --- /dev/null +++ b/src/core/NEON/kernels/NEStackLayerKernel.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_NESTACKLAYERKERNEL_H +#define ARM_COMPUTE_NESTACKLAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/ +class NEStackLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEStackLayerKernel"; + } + /** Default constructor */ + NEStackLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEStackLayerKernel(const NEStackLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEStackLayerKernel &operator=(const NEStackLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEStackLayerKernel(NEStackLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEStackLayerKernel &operator=(NEStackLayerKernel &&) = default; + /** Default destructor */ + ~NEStackLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @note Supported input tensor rank: up to 4 + * + * @param[in] input Input tensor. Data types supported: All + * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. + * @param[in] idx_input Index of the input tensor in the list of tensors to stack. + * All tensors in the list must have the same shape + * @param[in] num_tensors Number of tensors to stack + * @param[out] output Output tensor. Data types supported: Same as @p input. + * + */ + void configure(const ITensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEStackLayerKernel + * + * @note Supported input tensor rank: up to 4 + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. + * @param[in] idx_input Index of the input tensor in the list of tensors to stack + * All tensors in the list must have the same shape + * @param[in] num_tensors Number of tensors to stack + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output); + + // Inherited methods overridden + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; + unsigned int _axis; + unsigned int _idx_input; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NESTACKLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEStridedSliceKernel.cpp b/src/core/NEON/kernels/NEStridedSliceKernel.cpp index 13b2cb5a10..ac04a1076d 100644 --- a/src/core/NEON/kernels/NEStridedSliceKernel.cpp +++ b/src/core/NEON/kernels/NEStridedSliceKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h" +#include "src/core/NEON/kernels/NEStridedSliceKernel.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEStridedSliceKernel.h b/src/core/NEON/kernels/NEStridedSliceKernel.h new file mode 100644 index 0000000000..9ce517417d --- /dev/null +++ b/src/core/NEON/kernels/NEStridedSliceKernel.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H +#define ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the kernel to perform tensor strided slicing */ +class NEStridedSliceKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEStridedSliceKernel"; + } + /** Default constructor */ + NEStridedSliceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEStridedSliceKernel(const NEStridedSliceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEStridedSliceKernel &operator=(const NEStridedSliceKernel &) = delete; + /** Allow instances of this class to be moved */ + NEStridedSliceKernel(NEStridedSliceKernel &&) = default; + /** Allow instances of this class to be moved */ + NEStridedSliceKernel &operator=(NEStridedSliceKernel &&) = default; + /** Default destructor */ + ~NEStridedSliceKernel() = default; + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] input Source tensor info. Data type supported: All + * @param[out] output Destination tensor info. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. + * A slice of size 1 starting from starts[i] in the dimension must be preserved. + */ + void configure(const ITensorInfo *input, ITensorInfo *output, + const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, + int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); + + /** Static function to check if given info will lead to a valid configuration of @ref NEStridedSliceKernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] input Source tensor info. Data type supported: All + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. + * A slice of size 1 starting from starts[i] in the dimension must be preserved. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, + int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + Coordinates _starts_abs; /**< Absolute start coordinates */ + Coordinates _final_strides; /**< Final strides */ + int32_t _shrink_mask; /**< Shrink axis mask */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H */ diff --git a/src/core/NEON/kernels/NETableLookupKernel.cpp b/src/core/NEON/kernels/NETableLookupKernel.cpp index d26a0eedb5..19ce7f0352 100644 --- a/src/core/NEON/kernels/NETableLookupKernel.cpp +++ b/src/core/NEON/kernels/NETableLookupKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h" +#include "src/core/NEON/kernels/NETableLookupKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NETableLookupKernel.h b/src/core/NEON/kernels/NETableLookupKernel.h new file mode 100644 index 0000000000..7937999b46 --- /dev/null +++ b/src/core/NEON/kernels/NETableLookupKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NETABLELOOKUPKERNEL_H +#define ARM_COMPUTE_NETABLELOOKUPKERNEL_H + +#include "src/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; +class ILut; + +/** Interface for the kernel to perform table lookup calculations. */ +class NETableLookupKernel : public INESimpleKernel +{ +public: + const char *name() const override + { + return "NETableLookupKernel"; + } + /** Default constructor */ + NETableLookupKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETableLookupKernel(const NETableLookupKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETableLookupKernel &operator=(const NETableLookupKernel &) = delete; + /** Allow instances of this class to be moved */ + NETableLookupKernel(NETableLookupKernel &&) = default; + /** Allow instances of this class to be moved */ + NETableLookupKernel &operator=(NETableLookupKernel &&) = default; + /** Default destructor */ + ~NETableLookupKernel() = default; + /** Initialise the kernel's input, lut and output. + * + * @param[in] input An input tensor. Data types supported: U8/S16. + * @param[in] lut The input LUT. + * @param[out] output The output tensor. Data types supported: same as @p input + */ + void configure(const ITensor *input, const ILut *lut, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Perform table lookup on a given window. + * + * @param window window Region on which to execute the kernel. + */ + template + void tableLookup(const Window &window); + /** Common signature for all the specialised lut functions + * + * @param[in] window Region on which to execute the kernel. + */ + using TableLookupFunction = void (NETableLookupKernel::*)(const Window &window); + /** Sub function to use for the particular tensor types passed to configure() */ + TableLookupFunction _func; + const ILut *_lut; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NETABLELOOKUPKERNEL_H */ diff --git a/src/core/NEON/kernels/NEThresholdKernel.cpp b/src/core/NEON/kernels/NEThresholdKernel.cpp index aad440b120..183bb8db5c 100644 --- a/src/core/NEON/kernels/NEThresholdKernel.cpp +++ b/src/core/NEON/kernels/NEThresholdKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEThresholdKernel.h" +#include "src/core/NEON/kernels/NEThresholdKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEThresholdKernel.h b/src/core/NEON/kernels/NEThresholdKernel.h new file mode 100644 index 0000000000..6b3b3866b0 --- /dev/null +++ b/src/core/NEON/kernels/NEThresholdKernel.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NETHRESHOLDKERNEL_H +#define ARM_COMPUTE_NETHRESHOLDKERNEL_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the thresholding kernel */ +class NEThresholdKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEThresholdKernel"; + } + /** Default constructor */ + NEThresholdKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEThresholdKernel(const NEThresholdKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEThresholdKernel &operator=(const NEThresholdKernel &) = delete; + /** Allow instances of this class to be moved */ + NEThresholdKernel(NEThresholdKernel &&) = default; + /** Allow instances of this class to be moved */ + NEThresholdKernel &operator=(NEThresholdKernel &&) = default; + /** Default destructor */ + ~NEThresholdKernel() = default; + /** Initialise the kernel's input, output and threshold parameters. + * + * @param[in] input An input tensor. Data type supported: U8 + * @param[out] output The output tensor. Data type supported: U8. + * @param[in] info Threshold kernel descriptor + */ + void configure(const ITensor *input, ITensor *output, const ThresholdKernelInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref NEThresholdKernel + * + * @param[in] input Input tensor info. Data type supported: U8 + * @param[in] output Output tensor info. Data type supported: U8 + * @param[in] info Threshold kernel descriptor + * + * @return A status containing an error code in case of failure + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** run binary thresholding on the given window */ + void run_binary(const Window &window); + /** run range thresholding on the given window */ + void run_range(const Window &window); + + void (NEThresholdKernel::*_func)(const Window &window); + + const ITensor *_input; /**< Input */ + ITensor *_output; /**< Output */ + ThresholdKernelInfo _info; /**< Threshold descriptor */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */ diff --git a/src/core/NEON/kernels/NETileKernel.cpp b/src/core/NEON/kernels/NETileKernel.cpp index 99651c8b8a..94256dc12d 100644 --- a/src/core/NEON/kernels/NETileKernel.cpp +++ b/src/core/NEON/kernels/NETileKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NETileKernel.h" +#include "src/core/NEON/kernels/NETileKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NETileKernel.h b/src/core/NEON/kernels/NETileKernel.h new file mode 100644 index 0000000000..8dfea8bc2f --- /dev/null +++ b/src/core/NEON/kernels/NETileKernel.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NETILEKERNEL_H +#define ARM_COMPUTE_NETILEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a tile operation */ +class NETileKernel : public INEKernel +{ +public: + /** Default constructor */ + NETileKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NETileKernel(const NETileKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NETileKernel &operator=(const NETileKernel &) = delete; + /** Allow instances of this class to be moved */ + NETileKernel(NETileKernel &&) = default; + /** Allow instances of this class to be moved */ + NETileKernel &operator=(NETileKernel &&) = default; + /** Default destructor */ + ~NETileKernel() = default; + const char *name() const override + { + return "NETileKernel"; + } + /** Set the source, destination of the kernel + * + * @param[in] input Source tensor. Data type supported: All. + * @param[out] output Destination tensor. Same as @p input + * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. + */ + void configure(const ITensor *input, ITensor *output, const Multiples &multiples); + /** Static function to check if given info will lead to a valid configuration of @ref NETileKernel + * + * @param[in] input Source tensor info. Data type supported: All. + * @param[in] output Destination tensor info. Same as @p input + * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NETILEKERNEL_H */ diff --git a/src/core/NEON/kernels/NETransposeKernel.cpp b/src/core/NEON/kernels/NETransposeKernel.cpp index 6037810a44..134831be4c 100644 --- a/src/core/NEON/kernels/NETransposeKernel.cpp +++ b/src/core/NEON/kernels/NETransposeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" +#include "src/core/NEON/kernels/NETransposeKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NETransposeKernel.h b/src/core/NEON/kernels/NETransposeKernel.h new file mode 100644 index 0000000000..73d2098fb3 --- /dev/null +++ b/src/core/NEON/kernels/NETransposeKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NETRANSPOSEKERNEL_H +#define ARM_COMPUTE_NETRANSPOSEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel which transposes the elements of a matrix. + * + * [width, height, batch] -> [height, width, batch] + * + */ +class NETransposeKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NETransposeKernel"; + } + /** Default constructor */ + NETransposeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETransposeKernel(const NETransposeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETransposeKernel &operator=(const NETransposeKernel &) = delete; + /** Allow instances of this class to be moved */ + NETransposeKernel(NETransposeKernel &&) = default; + /** Allow instances of this class to be moved */ + NETransposeKernel &operator=(NETransposeKernel &&) = default; + /** Default destructor */ + ~NETransposeKernel() = default; + + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NETransposeKernel + * + * @param[in] input Input tensor. Data types supported: All + * @param[in] output Output tensor. Data type supported: Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the transpose functions + * + * @param[in] input An input tensor. Data types supported: All + * @param[out] output The output tensor. Data type supported: same as @p input + * @param[in] window Region on which to execute the kernel. + */ + using TransposeFunction = void(const ITensor *input, ITensor *output, const Window &window); + /** Transpose function to use for the particular tensor types passed to configure() */ + TransposeFunction *_func; + const ITensor *_input; + ITensor *_output; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NETRANSPOSEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp b/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp index 129c83c695..cbdec50a42 100644 --- a/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp +++ b/src/core/NEON/kernels/NEUpsampleLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h" +#include "src/core/NEON/kernels/NEUpsampleLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEUpsampleLayerKernel.h b/src/core/NEON/kernels/NEUpsampleLayerKernel.h new file mode 100644 index 0000000000..7ff797a9f8 --- /dev/null +++ b/src/core/NEON/kernels/NEUpsampleLayerKernel.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H +#define ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the Upsample layer kernel.*/ +class NEUpsampleLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEUpsampleLayerKernel"; + } + /** Default constructor */ + NEUpsampleLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEUpsampleLayerKernel(const NEUpsampleLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEUpsampleLayerKernel &operator=(const NEUpsampleLayerKernel &) = delete; + /** Default Move Constructor. */ + NEUpsampleLayerKernel(NEUpsampleLayerKernel &&) = default; + /** Default move assignment operator */ + NEUpsampleLayerKernel &operator=(NEUpsampleLayerKernel &&) = default; + /** Default destructor */ + ~NEUpsampleLayerKernel() = default; + /** Set the input output tensors. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] info Contains stride information described in @ref Size2D. + * @param[in] policy Defines the policy to fill the intermediate pixels. + * + */ + void configure(const ITensor *input, ITensor *output, const Size2D &info, const InterpolationPolicy policy); + /** Static function to check if given info will lead to a valid configuration of @ref NEUpsampleLayerKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Destination tensor info. Data types supported: same as @p input. + * @param[in] info Contains stride information described in @ref Size2D. + * @param[in] policy Defines the policy to fill the intermediate pixels. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy policy); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Function to run upsample layer (NCHW) + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void upsample_nchw(const Window &window); + /** Function to run upsample layer (NHWC) + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void upsample_nhwc(const Window &window); + + using UpsampleFunctionPtr = void (NEUpsampleLayerKernel::*)(const Window &window); + +private: + UpsampleFunctionPtr _func; + const ITensor *_input; + ITensor *_output; + Size2D _info; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEWarpKernel.cpp b/src/core/NEON/kernels/NEWarpKernel.cpp index 891304f02c..1ae076153b 100644 --- a/src/core/NEON/kernels/NEWarpKernel.cpp +++ b/src/core/NEON/kernels/NEWarpKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEWarpKernel.h" +#include "src/core/NEON/kernels/NEWarpKernel.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" diff --git a/src/core/NEON/kernels/NEWarpKernel.h b/src/core/NEON/kernels/NEWarpKernel.h new file mode 100644 index 0000000000..2c4cb55e3c --- /dev/null +++ b/src/core/NEON/kernels/NEWarpKernel.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2016-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEWARPKERNEL_H +#define ARM_COMPUTE_NEWARPKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +#include +#include +namespace arm_compute +{ +class ITensor; + +/** Common interface for warp affine and warp perspective */ +class INEWarpKernel : public INEKernel +{ +public: + /** Default constructor */ + INEWarpKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEWarpKernel(const INEWarpKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEWarpKernel &operator=(const INEWarpKernel &) = delete; + /** Allow instances of this class to be moved */ + INEWarpKernel(INEWarpKernel &&) = default; + /** Allow instances of this class to be moved */ + INEWarpKernel &operator=(INEWarpKernel &&) = default; + /** Default destructor */ + ~INEWarpKernel() = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: U8. + * @param[in] matrix The perspective or affine matrix to use. Must be 2x3 for affine and 3x3 for perspective of type float. + * The matrix argument requires 9 values, for the affine case the last 3 values are ignored. + * @param[in] border_mode Strategy to use for borders + * @param[in] constant_border_value Constant value used for filling the border. + */ + virtual void configure(const ITensor *input, ITensor *output, const std::array &matrix, BorderMode border_mode, uint8_t constant_border_value); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + + // Inherited methods overridden: + BorderSize border_size() const override; + +protected: + /** function to perform warp affine or warp perspective on the given window when border mode == UNDEFINED + * + * @param[in] window Region on which to execute the kernel + */ + virtual void warp_undefined(const Window &window) = 0; + /** function to perform warp affine or warp perspective on the given window when border mode == CONSTANT + * + * @param[in] window Region on which to execute the kernel + */ + virtual void warp_constant(const Window &window) = 0; + /** function to perform warp affine or warp perspective on the given window when border mode == REPLICATE + * + * @param[in] window Region on which to execute the kernel + */ + virtual void warp_replicate(const Window &window) = 0; + /** Common signature for all the specialised warp functions + * + * @param[in] window Region on which to execute the kernel. + */ + void (INEWarpKernel::*_func)(const Window &window); + + const ITensor *_input; /**< Input Tensor */ + ITensor *_output; /**< Output Tensor */ + uint8_t _constant_border_value; /**< Constant value used for filling the border. This value is used for those pixels out of the ROI when the border mode is CONSTANT */ + std::array _matrix; /**< The affine or perspective matrix. Must be 2x3 for warp affine or 3x3 for warp perspective of type float. */ +}; + +/** Template interface for the kernel to compute warp affine + * + */ +template +class NEWarpAffineKernel : public INEWarpKernel +{ +private: + const char *name() const override + { + return "NEWarpAffineKernel"; + } + // Inherited methods overridden: + void warp_undefined(const Window &window) override; + void warp_constant(const Window &window) override; + void warp_replicate(const Window &window) override; +}; + +/** Template interface for the kernel to compute warp perspective + * + */ +template +class NEWarpPerspectiveKernel : public INEWarpKernel +{ +private: + const char *name() const override + { + return "NEWarpPerspectiveKernel"; + } + // Inherited methods overridden: + void warp_undefined(const Window &window) override; + void warp_constant(const Window &window) override; + void warp_replicate(const Window &window) override; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEWARPKERNEL_H */ diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp index c7fa2d2365..118655b755 100644 --- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp +++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" +#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.h b/src/core/NEON/kernels/NEWeightsReshapeKernel.h new file mode 100644 index 0000000000..9678b79fda --- /dev/null +++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H +#define ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform reshaping on the weights used by convolution and locally connected layer + * + * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. + * In combination with the @ref NEIm2ColKernel can transform a convolution to a matrix multiplication. + * + * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have: + * @f[ + * \left( \begin{array}{ccc} + * a000 & a001 & a002 \\ + * a010 & a011 & a012 \\ + * a020 & a021 & a022 \\ + * \end{array} \right) + * \left( \begin{array}{ccc} + * a100 & a101 & a102 \\ + * a110 & a111 & a112 \\ + * a120 & a121 & a122 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccc} + * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\ + * \end{array} \right) + * @f] + */ +class NEWeightsReshapeKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEWeightsReshapeKernel"; + } + /** Constructor.*/ + NEWeightsReshapeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWeightsReshapeKernel(const NEWeightsReshapeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWeightsReshapeKernel &operator=(const NEWeightsReshapeKernel &) = delete; + /** Allow instances of this class to be moved */ + NEWeightsReshapeKernel(NEWeightsReshapeKernel &&) = default; + /** Allow instances of this class to be moved */ + NEWeightsReshapeKernel &operator=(NEWeightsReshapeKernel &&) = default; + /** Default destructor */ + ~NEWeightsReshapeKernel() = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. + * Data types supported: All + * @param[in] bias The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input + * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. + * @param[out] output The output tensor. Data types supported: Same as @p input + */ + void configure(const ITensor *input, const ITensor *bias, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEWeightsReshapeKernel + * + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. + * Data types supported: All + * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input + * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. + * @param[in] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + const ITensor *_bias; + ITensor *_output; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp index 90afbd6a19..b5afeed1f6 100644 --- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h" +#include "src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h new file mode 100644 index 0000000000..81b4cbed9e --- /dev/null +++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H +#define ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Interface for the width concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class NEWidthConcatenateLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEWidthConcatenateLayerKernel"; + } + /** Default constructor */ + NEWidthConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWidthConcatenateLayerKernel(const NEWidthConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWidthConcatenateLayerKernel &operator=(const NEWidthConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEWidthConcatenateLayerKernel(NEWidthConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEWidthConcatenateLayerKernel &operator=(NEWidthConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~NEWidthConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] width_offset The offset on the X axis. + * @param[in,out] output Output tensor info. Data types supported: Same as @p input. + */ + void configure(const ITensorInfo *input, unsigned int width_offset, ITensorInfo *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEWidthConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: All + * @param[in] width_offset The offset on the X axis. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + +private: + unsigned int _width_offset; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h index bf5d77fc43..2b87e512dc 100644 --- a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h +++ b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h @@ -24,7 +24,7 @@ #ifndef ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H #define ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H -#include "arm_compute/core/NEON/INEKernel.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/NEON/kernels/convolution/common/convolution.hpp" #include "src/core/NEON/kernels/convolution/common/tensor.hpp" diff --git a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp index 48c0616b35..33bcc20d39 100644 --- a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp +++ b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h" +#include "src/core/NEON/kernels/NEYOLOLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" diff --git a/src/core/NEON/kernels/NEYOLOLayerKernel.h b/src/core/NEON/kernels/NEYOLOLayerKernel.h new file mode 100644 index 0000000000..806cf9cc09 --- /dev/null +++ b/src/core/NEON/kernels/NEYOLOLayerKernel.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEYOLOLAYERKERNEL_H +#define ARM_COMPUTE_NEYOLOLAYERKERNEL_H + +#include "src/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the YOLO layer kernel. */ +class NEYOLOLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEYOLOLayerKernel"; + } + /** Constructor */ + NEYOLOLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEYOLOLayerKernel(const NEYOLOLayerKernel &) = delete; + /** Default move constructor */ + NEYOLOLayerKernel(NEYOLOLayerKernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEYOLOLayerKernel &operator=(const NEYOLOLayerKernel &) = delete; + /** Default move assignment operator */ + NEYOLOLayerKernel &operator=(NEYOLOLayerKernel &&) = default; + /** Default destructor */ + ~NEYOLOLayerKernel() = default; + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place + * + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] act_info Activation layer parameters. + * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) + */ + void configure(ITensor *input, ITensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); + /** Static function to check if given info will lead to a valid configuration of @ref NEYOLOLayerKernel + * + * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result + * of the activation function. Data types supported: F16/F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Function to run YOLO layer + * + * @param[in] window Region on which to execute the kernel. + */ + template + void yolo_layer_nchw(const Window &window); + /** Function to run YOLO layer on tensors with NHWC format + * + * @param[in] window Region on which to execute the kernel. + */ + template + void yolo_layer_nhwc(const Window &window); + /** Common signature for all the yolo layer functions + * + * @param[in] window Region on which to execute the kernel. + */ + using YOLOFunctionPtr = void (NEYOLOLayerKernel::*)(const Window &window); + +private: + YOLOFunctionPtr _func; + ITensor *_input; + ITensor *_output; + ActivationLayerInfo _act_info; + int32_t _num_classes; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_NEYOLOLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h index 030f1aad12..92c013260b 100644 --- a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h +++ b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h @@ -24,7 +24,7 @@ #ifndef SRC_INEGEMMWRAPPERKERNEL_H #define SRC_INEGEMMWRAPPERKERNEL_H -#include "arm_compute/core/NEON/INEKernel.h" +#include "src/core/NEON/INEKernel.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h b/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h index a2f7e3bd59..a956898403 100644 --- a/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h +++ b/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h @@ -24,9 +24,9 @@ #ifndef SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H #define SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" +#include "src/core/NEON/INEKernel.h" #include "src/core/NEON/kernels/convolution/depthwise/depthwise.hpp" diff --git a/src/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h b/src/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h index 4af82f89a8..7fcf2b1e4d 100644 --- a/src/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h +++ b/src/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h @@ -24,10 +24,10 @@ #ifndef ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H #define ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_gemm_compute_iface.hpp" +#include "src/core/NEON/INEKernel.h" #include "gemm_common.hpp" -- cgit v1.2.1